402 files changed, 24727 insertions, 4983 deletions
diff --git a/src/blob/blob_fileops.c b/src/blob/blob_fileops.c
new file mode 100644
index 00000000..713e7e83
--- /dev/null
+++ b/src/blob/blob_fileops.c
@@ -0,0 +1,352 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2013, 2015 Oracle and/or its affiliates.  All rights reserved.
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+#include "dbinc/fop.h"
+
+/*
+ * __blob_file_create --
+ *	Blobs are orginaized in a directory sturcture consisting of
+ *	<DB_HOME>/__db_bl/<blob_sub_dir>/.  Below that, the blob_id
+ *	is used to construct a path to the blob file, and to name
+ *	the blob file.  blob_id=1 would result in __db.bl001.
+ *	blob_id=12002 would result in 012/__db.bl012002.
+ *
+ * PUBLIC: int __blob_file_create __P
+ * PUBLIC:  ((DBC *, DB_FH **, db_seq_t *));
+ */
+int
+__blob_file_create(dbc, fhpp, blob_id)
+	DBC *dbc;
+	DB_FH **fhpp;
+	db_seq_t *blob_id;
+{
+	DB  *dbp;
+	DB_FH *fhp;
+	ENV *env;
+	int ret;
+	char *ppath;
+	const char *dir;
+
+	dbp = dbc->dbp;
+	env = dbp->env;
+	fhp = *fhpp = NULL;
+	ppath = NULL;
+	dir = NULL;
+	DB_ASSERT(env, !DB_IS_READONLY(dbc->dbp));
+
+	if ((ret = __blob_generate_id(dbp, dbc->txn, blob_id)) != 0)
+		goto err;
+
+	if ((ret = __blob_id_to_path(
+	    env, dbp->blob_sub_dir, *blob_id, &ppath)) != 0)
+		goto err;
+
+	if ((ret = __fop_create(env, dbc->txn,
+	    &fhp, ppath, &dir, DB_APP_BLOB, env->db_mode,
+	    (F_ISSET(dbc->dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)))
+	    != 0) {
+		__db_errx(env, DB_STR_A("0228",
+		    "Error creating blob file: %llu.", "%llu"),
+		    (unsigned long long)*blob_id);
+		goto err;
+	}
+
+err:	if (ppath != NULL)
+		__os_free(env, ppath);
+	if (ret == 0)
+		*fhpp = fhp;
+	return (ret);
+}
+
+/*
+ * __blob_file_close --
+ *
+ * PUBLIC: int  __blob_file_close __P ((DBC *, DB_FH *, u_int32_t));
+ */
+int
+__blob_file_close(dbc, fhp, flags)
+	DBC *dbc;
+	DB_FH *fhp;
+	u_int32_t flags;
+{
+	ENV *env;
+	int ret, t_ret;
+
+	env = dbc->env;
+	ret = t_ret = 0;
+	if (fhp != NULL) {
+		/* Only sync if the file was open for writing. */
+		if (LF_ISSET(DB_FOP_WRITE))
+			t_ret = __os_fsync(env, fhp);
+		ret = __os_closehandle(env, fhp);
+		if (t_ret != 0)
+			ret = t_ret;
+	}
+
+	return (ret);
+}
+
+/*
+ * __blob_file_delete --
+ *	Delete a blob file.
+ *
+ * PUBLIC: int __blob_file_delete __P((DBC *, db_seq_t));
+ */
+int
+__blob_file_delete(dbc, blob_id)
+	DBC *dbc;
+	db_seq_t blob_id;
+{
+	ENV *env;
+	char *blob_name, *full_path;
+	int ret;
+
+	env = dbc->dbp->env;
+	blob_name = full_path = NULL;
+
+	if ((ret = __blob_id_to_path(
+	    env, dbc->dbp->blob_sub_dir, blob_id, &blob_name)) != 0) {
+		__db_errx(env, DB_STR_A("0229",
+		   "Failed to construct path for blob file %llu.",
+		   "%llu"), (unsigned long long)blob_id);
+		goto err;
+	}
+
+	/* Log the file remove event. */
+	if (!IS_REAL_TXN(dbc->txn)) {
+		if ((ret = __db_appname(
+		    env, DB_APP_BLOB, blob_name, NULL, &full_path)) != 0)
+			goto err;
+		ret = __os_unlink(env, full_path, 0);
+	} else {
+		ret = __fop_remove(
+		    env, dbc->txn, NULL, blob_name, NULL, DB_APP_BLOB, 0);
+	}
+
+	if (ret != 0) {
+		__db_errx(env, DB_STR_A("0230",
+		    "Failed to remove blob file while deleting: %s.",
+		    "%s"), blob_name);
+		goto err;
+	}
+
+err:	if (blob_name != NULL)
+		__os_free(env, blob_name);
+	if (full_path != NULL)
+		__os_free(env, full_path);
+	return (ret);
+}
+
+/*
+ * __blob_file_open --
+ *
+ * PUBLIC: int __blob_file_open
+ * PUBLIC:	__P((DB *, DB_FH **, db_seq_t, u_int32_t, int));
+ */
+int
+__blob_file_open(dbp, fhpp, blob_id, flags, printerr)
+	DB *dbp;
+	DB_FH **fhpp;
+	db_seq_t blob_id;
+	u_int32_t flags;
+	int printerr;
+{
+	ENV *env;
+	int ret;
+	u_int32_t oflags;
+	char *path, *ppath;
+
+	env = dbp->env;
+	*fhpp = NULL;
+	ppath = path = NULL;
+	oflags = 0;
+
+	if ((ret = __blob_id_to_path(
+	    env, dbp->blob_sub_dir, blob_id, &ppath)) != 0)
+		goto err;
+
+	if ((ret = __db_appname(
+	    env, DB_APP_BLOB, ppath, NULL, &path)) != 0) {
+		__db_errx(env, DB_STR_A("0231",
+		    "Failed to get path to blob file: %llu.", "%llu"),
+		    (unsigned long long)blob_id);
+		goto err;
+	}
+
+	if (LF_ISSET(DB_FOP_READONLY) || DB_IS_READONLY(dbp))
+		oflags |= DB_OSO_RDONLY;
+	if ((ret = __os_open(env, path, 0, oflags, 0, fhpp)) != 0) {
+		/*
+		 * In replication it is possible to try to read a blob file
+		 * that has been deleted.  In that case do not print an error.
+		 */
+		if (printerr == 1) {
+			__db_errx(env, DB_STR_A("0232",
+			    "Error opening blob file: %s.", "%s"), path);
+		}
+		goto err;
+	}
+
+err:	if (path != NULL)
+		__os_free(env, path);
+	if (ppath != NULL)
+		__os_free(env, ppath);
+	return (ret);
+}
+
+/*
+ * __blob_file_read --
+ *
+ * PUBLIC: int __blob_file_read
+ * PUBLIC:	__P((ENV *, DB_FH *, DBT *, off_t, u_int32_t));
+ */
+int
+__blob_file_read(env, fhp, dbt, offset, size)
+	ENV *env;
+	DB_FH *fhp;
+	DBT *dbt;
+	off_t offset;
+	u_int32_t size;
+{
+	int ret;
+	size_t bytes;
+	void *buf;
+
+	bytes = 0;
+	buf = NULL;
+
+	if ((ret = __os_seek(env, fhp, 0, 0, offset)) != 0)
+		goto err;
+
+	if (F_ISSET(dbt, DB_DBT_USERCOPY)) {
+		if ((ret = __os_malloc(env, size, &buf)) != 0)
+			goto err;
+	} else
+		buf = dbt->data;
+
+	if ((ret = __os_read(env, fhp, buf, size, &bytes)) != 0) {
+		__db_errx(env, DB_STR("0233", "Error reading blob file."));
+		goto err;
+	}
+	/*
+	 * It is okay to read off the end of the file, in which case less bytes
+	 * will be returned than requested.  This is also how the code behaves
+	 * in the DB_DBT_PARTIAL API.
+	 */
+	dbt->size = (u_int32_t)bytes;
+
+	if (F_ISSET(dbt, DB_DBT_USERCOPY)  && dbt->size != 0) {
+		ret = env->dbt_usercopy(
+		    dbt, 0, buf, dbt->size, DB_USERCOPY_SETDATA);
+	}
+
+err:	if (buf != NULL && buf != dbt->data)
+		__os_free(env, buf);
+	return (ret);
+}
+
+/*
+ * __blob_file_write --
+ *
+ * PUBLIC: int __blob_file_write
+ * PUBLIC: __P((DBC *, DB_FH *, DBT *,
+ * PUBLIC:    off_t, db_seq_t, off_t *, u_int32_t));
+ */
+int
+__blob_file_write(dbc, fhp, buf, offset, blob_id, file_size, flags)
+	DBC *dbc;
+	DB_FH *fhp;
+	DBT *buf;
+	off_t offset;
+	db_seq_t blob_id;
+	off_t *file_size;
+	u_int32_t flags;
+{
+	ENV *env;
+	off_t size, write_offset;
+	char *dirname, *name;
+	int ret, blob_lg;
+	size_t data_size;
+	void *ptr;
+
+	env = dbc->env;
+	dirname = name = NULL;
+	size = 0;
+	write_offset = offset;
+	DB_ASSERT(env, !DB_IS_READONLY(dbc->dbp));
+	DB_ASSERT(env, fhp != NULL);
+
+	/* File size is used to tell if the write is extending the file. */
+	size = *file_size;
+
+	if (DBENV_LOGGING(env)) {
+		if ((ret = __log_get_config(
+		    env->dbenv, DB_LOG_BLOB, &blob_lg)) != 0)
+			goto err;
+		if (blob_lg == 0 && !REP_ON(env))
+			LF_SET(DB_FOP_PARTIAL_LOG);
+		if (!LF_ISSET(DB_FOP_CREATE) && (size <= offset))
+			LF_SET(DB_FOP_APPEND);
+	}
+
+	if ((ret = __blob_id_to_path(
+	    env, dbc->dbp->blob_sub_dir, blob_id, &name)) != 0)
+		goto err;
+
+	if ((ret = __dbt_usercopy(env, buf)) != 0)
+		goto err;
+
+	/*
+	 * If the write overwrites some of the file, and writes off the end
+	 * of the file, break the write into two writes, one that overwrites
+	 * data, and an append.  Otherwise if the write is aborted, the
+	 * data written past the end of the file will not be erased.
+	 */
+	if (offset < size && (offset + buf->size) > size) {
+		ptr = buf->data;
+		data_size = (size_t)(size - offset);
+		if ((ret = __fop_write_file(env, dbc->txn, name, dirname,
+		    DB_APP_BLOB, fhp, offset, ptr, data_size, flags)) != 0) {
+			__db_errx(env, DB_STR_A("0235",
+			    "Error writing blob file: %s.", "%s"), name);
+				goto err;
+		}
+		LF_SET(DB_FOP_APPEND);
+		ptr = (u_int8_t *)ptr + data_size;
+		data_size = buf->size - data_size;
+		write_offset = size;
+	} else {
+		if (!LF_ISSET(DB_FOP_CREATE) && (offset >= size))
+			LF_SET(DB_FOP_APPEND);
+		ptr = buf->data;
+		data_size = buf->size;
+	}
+
+	if ((ret = __fop_write_file(env, dbc->txn, name, dirname,
+	    DB_APP_BLOB, fhp, write_offset, ptr, data_size, flags)) != 0) {
+		__db_errx(env, DB_STR_A("0236",
+		    "Error writing blob file: %s.", "%s"), name);
+		goto err;
+	}
+
+	if (LF_ISSET(DB_FOP_SYNC_WRITE))
+		if ((ret = __os_fsync(env, fhp)) != 0)
+			goto err;
+
+	/* Update the size of the file. */
+	if ((offset + (off_t)buf->size) > size)
+		*file_size = offset + (off_t)buf->size;
+
+err:	if (name != NULL)
+		__os_free(env, name);
+
+	return (ret);
+}
diff --git a/src/blob/blob_page.c b/src/blob/blob_page.c
new file mode 100644
index 00000000..96a2b59b
--- /dev/null
+++ b/src/blob/blob_page.c
@@ -0,0 +1,374 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2013, 2015 Oracle and/or its affiliates.  All rights reserved.
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+#include "dbinc/fop.h"
+
+/*
+ * Blob file data item code.
+ *
+ * Blob file data entries are stored on linked lists of pages.  The initial
+ * reference is a structure with an encoded version of the path where the file
+ * is stored. The blob file contains only the users data.
+ */
+
+/*
+ * __blob_bulk --
+ *	Dump blob file into buffer.
+ *	The space requirements have already been checked, if the blob is
+ *	larger than UINT32MAX then DB_BUFFER_SMALL would have already
+ *	been returned.
+ * PUBLIC: int __blob_bulk
+ * PUBLIC:    __P((DBC *, u_int32_t, db_seq_t, u_int8_t *));
+ */
+int
+__blob_bulk(dbc, len, blob_id, dp)
+	DBC *dbc;
+	u_int32_t len;
+	db_seq_t blob_id;
+	u_int8_t *dp;
+{
+	DBT dbt;
+	DB_FH *fhp;
+	ENV *env;
+	int ret, t_ret;
+
+	env = dbc->dbp->env;
+	fhp = NULL;
+	memset(&dbt, 0, sizeof(dbt));
+	F_SET(&dbt, DB_DBT_USERMEM);
+	dbt.ulen = len;
+	dbt.data = (void *)dp;
+
+	if ((ret = __blob_file_open(
+	    dbc->dbp, &fhp, blob_id, DB_FOP_READONLY, 1)) != 0)
+		goto err;
+
+	if ((ret = __blob_file_read(env, fhp, &dbt, 0, len)) != 0)
+		goto err;
+
+	/* Close any open file descriptors. */
+err:	if (fhp != NULL) {
+		t_ret = __blob_file_close(dbc, fhp, 0);
+		if (ret == 0)
+			ret = t_ret;
+	}
+	return (ret);
+}
+
+/*
+ * __blob_get --
+ *	Get a blob file item. Analogous to db_overflow.c:__db_goff.
+ *
+ * PUBLIC: int __blob_get __P((DBC *,
+ * PUBLIC:     DBT *, db_seq_t, off_t, void **, u_int32_t *));
+ */
+int
+__blob_get(dbc, dbt, blob_id, file_size, bpp, bpsz)
+	DBC *dbc;
+	DBT *dbt;
+	db_seq_t blob_id;
+	off_t file_size;
+	void **bpp;
+	u_int32_t *bpsz;
+{
+	DB_FH *fhp;
+	ENV *env;
+	int ret, t_ret;
+	u_int32_t needed, start, tlen;
+
+	env = dbc->dbp->env;
+	fhp = NULL;
+	ret = 0;
+
+	/*
+	 * Blobs larger than UINT32_MAX can only be read using
+	 * the DB_STREAM API, or the DB_DBT_PARTIAL API.
+	 */
+	if (file_size > UINT32_MAX) {
+		if (!F_ISSET(dbt, DB_DBT_PARTIAL)) {
+			dbt->size = UINT32_MAX;
+			ret = DB_BUFFER_SMALL;
+			goto err;
+		} else
+			tlen = UINT32_MAX;
+	} else
+		tlen = (u_int32_t)file_size;
+
+	if (((ret = __db_alloc_dbt(
+	    env, dbt, tlen, &needed, &start, bpp, bpsz)) != 0) || needed == 0)
+		goto err;
+	dbt->size = needed;
+
+	if ((ret = __blob_file_open(
+	    dbc->dbp, &fhp, blob_id, DB_FOP_READONLY, 1)) != 0)
+		goto err;
+
+	if ((ret = __blob_file_read(env, fhp, dbt, dbt->doff, needed)) != 0)
+		goto err;
+
+	/* Close any open file descriptors. */
+err:	if (fhp != NULL) {
+		t_ret = __blob_file_close(dbc, fhp, 0);
+		if (ret == 0)
+			ret = t_ret;
+	}
+	/* Does the dbt need to be cleaned on error? */
+	return (ret);
+}
+
+/*
+ * __blob_put --
+ *	Put a blob file item.
+ *
+ * PUBLIC: int __blob_put __P((
+ * PUBLIC:    DBC *, DBT *, db_seq_t *, off_t *size, DB_LSN *));
+ */
+int
+__blob_put(dbc, dbt, blob_id, size, plsn)
+	DBC *dbc;
+	DBT *dbt;
+	db_seq_t *blob_id;
+	off_t *size;
+	DB_LSN *plsn;
+{
+	DBT partial;
+	DB_FH *fhp;
+	ENV *env;
+	int ret, t_ret;
+	off_t offset;
+
+	env = dbc->dbp->env;
+	fhp = NULL;
+	offset = 0;
+	DB_ASSERT(env, blob_id != NULL);
+	DB_ASSERT(env, *blob_id == 0);
+
+	ZERO_LSN(*plsn);
+
+	/* If the id didn't refer to an existing blob generate a new one. */
+	if ((ret = __blob_file_create(dbc, &fhp, blob_id)) != 0)
+		goto err;
+
+	/*
+	 * If doing a partial put with dbt->doff == 0, then treat like
+	 * a normal put.  Otherwise write NULLs into the file up to doff, which
+	 * is required by the PARTIAL API.  Since the file is being created,
+	 * its size is always 0.
+	 */
+	DB_ASSERT(env, *size == 0);
+	if (F_ISSET(dbt, DB_DBT_PARTIAL) && dbt->doff > 0) {
+		memset(&partial, 0, sizeof(partial));
+		if ((ret = __os_malloc(env, dbt->doff, &partial.data)) != 0)
+			goto err;
+		memset(partial.data, 0, dbt->doff);
+		partial.size = dbt->doff;
+		ret = __blob_file_write(
+		    dbc, fhp, &partial, 0, *blob_id, size, DB_FOP_CREATE);
+		offset = dbt->doff;
+		__os_free(env, partial.data);
+		if (ret != 0)
+			goto err;
+	}
+
+	if ((ret = __blob_file_write(
+	    dbc, fhp, dbt, offset, *blob_id, size, DB_FOP_CREATE)) != 0)
+		goto err;
+
+	/* Close any open file descriptors. */
+err:	if (fhp != NULL) {
+		t_ret = __blob_file_close(dbc, fhp, DB_FOP_WRITE);
+		if (ret == 0)
+			ret = t_ret;
+	}
+	return (ret);
+}
+
+/*
+ * __blob_repl --
+ *	Replace a blob file contents.  It would be nice if this could be done
+ *	by truncating the file and writing in the new data, but undoing a
+ *	truncate would require a lot of logging, so it is performed by
+ *	deleting the old blob file, and creating a new one.
+ *
+ * PUBLIC: int __blob_repl __P((DBC *, DBT *, db_seq_t, db_seq_t *,off_t *));
+ */
+int
+__blob_repl(dbc, nval, blob_id, new_blob_id, size)
+	DBC *dbc;
+	DBT *nval;
+	db_seq_t blob_id;
+	db_seq_t *new_blob_id;
+	off_t *size;
+{
+	DBT partial;
+	DB_FH *fhp, *new_fhp;
+	DB_LSN lsn;
+	ENV *env;
+	int ret, t_ret;
+	off_t current, old_size;
+
+	fhp = new_fhp = NULL;
+	*new_blob_id = 0;
+	old_size = *size;
+	env = dbc->env;
+	memset(&partial, 0, sizeof(partial));
+
+	/*
+	 * Handling partial replace.
+	 * 1. doff > blob file size : Pad the end of the blob file with NULLs
+	 *	up to doff, then append the data.
+	 * 2. doff == size: Write the data to the existing blob file.
+	 * 3. dlen == size: Write the data to the existing blob file.
+	 * 4. Create a new blob file.  Copy old blob data up to doff
+	 *	to the new file.  Append the new data.  Append data
+	 *	from the old file from doff + dlen to the end of the
+	 *	old file to the new file.  Delete the old file.
+	 */
+	if (F_ISSET(nval, DB_DBT_PARTIAL)) {
+		if ((nval->doff > *size) ||
+		    ((nval->doff == *size) || (nval->dlen == nval->size))) {
+			/* Open the file for appending. */
+			if ((ret = __blob_file_open(
+			    dbc->dbp, &fhp, blob_id, 0, 1)) != 0)
+				goto err;
+			*new_blob_id = blob_id;
+
+			/* Pad the end of the blob with NULLs. */
+			if (nval->doff > *size) {
+				partial.size = nval->doff - (u_int32_t)*size;
+				if ((ret = __os_malloc(
+				    env, partial.size, &partial.data)) != 0)
+					goto err;
+				memset(partial.data, 0, partial.size);
+				if ((ret = __blob_file_write(dbc, fhp,
+				    &partial, *size, blob_id, size, 0)) != 0)
+					goto err;
+			}
+
+			/* Write in the data. */
+			if ((ret = __blob_file_write(dbc, fhp,
+			    nval, nval->doff, blob_id, size, 0)) != 0)
+				goto err;
+
+			/* Close the file */
+			ret = __blob_file_close(dbc, fhp, DB_FOP_WRITE);
+			fhp = NULL;
+			if (ret != 0)
+				goto err;
+		} else {
+			/* Open the old blob file. */
+			if ((ret = __blob_file_open(
+			    dbc->dbp, &fhp, blob_id, DB_FOP_READONLY, 1)) != 0)
+				goto err;
+			/* Create the new blob file. */
+			if ((ret = __blob_file_create(
+			    dbc, &new_fhp, new_blob_id)) != 0)
+				goto err;
+
+			*size = 0;
+			/* Copy data to the new file up to doff. */
+			if (nval->doff != 0) {
+				partial.ulen = partial.size = nval->doff;
+				if ((ret = __os_malloc(
+				    env, partial.ulen, &partial.data)) != 0)
+					goto err;
+				if ((ret = __blob_file_read(
+				    env, fhp, &partial, 0, partial.size)) != 0)
+					goto err;
+				if ((ret = __blob_file_write(
+				    dbc, new_fhp, &partial, 0,
+				    *new_blob_id, size, DB_FOP_CREATE)) != 0)
+					goto err;
+			}
+
+			/* Write the partial data into the new file. */
+			if ((ret = __blob_file_write(
+			    dbc, new_fhp, nval, nval->doff,
+			    *new_blob_id, size, DB_FOP_CREATE)) != 0)
+				goto err;
+
+			/* Copy remaining blob data into the new file. */
+			current = nval->doff + nval->dlen;
+			while (current < old_size) {
+				if (partial.ulen < MEGABYTE) {
+					if ((ret = __os_realloc(env,
+					    MEGABYTE, &partial.data)) != 0)
+						goto err;
+					partial.size = partial.ulen = MEGABYTE;
+				}
+				if ((old_size - current) < partial.ulen) {
+					partial.size =
+					(u_int32_t)(old_size - current);
+				} else
+					partial.size = MEGABYTE;
+
+				if ((ret = __blob_file_read(env, fhp,
+				    &partial, current, partial.size)) != 0)
+					goto err;
+				if ((ret = __blob_file_write(
+				    dbc, new_fhp, &partial, *size,
+				    *new_blob_id, size, DB_FOP_CREATE)) != 0)
+					goto err;
+				current += partial.size;
+			}
+
+			/* Close the old file. */
+			ret = __blob_file_close(dbc, fhp, 0);
+			fhp = NULL;
+			if (ret != 0)
+				goto err;
+
+			/* Delete the old blob file. */
+			if ((ret = __blob_del(dbc, blob_id)) != 0)
+				goto err;
+		}
+		goto err;
+	}
+
+	if ((ret = __blob_del(dbc, blob_id)) != 0)
+		goto err;
+
+	*size = 0;
+	if ((ret = __blob_put(dbc, nval, new_blob_id, size, &lsn)) != 0)
+		goto err;
+
+err:	if (fhp != NULL) {
+		t_ret = __blob_file_close(dbc, fhp, DB_FOP_WRITE);
+		if (ret == 0)
+			ret = t_ret;
+	}
+	if (new_fhp != NULL) {
+		t_ret = __blob_file_close(dbc, new_fhp, DB_FOP_WRITE);
+		if (ret == 0)
+			ret = t_ret;
+	}
+	if (partial.data != NULL)
+		__os_free(env, partial.data);
+	return (ret);
+}
+
+/*
+ * __blob_del --
+ *	Delete a blob file. The onpage record is handled separately..
+ *
+ * PUBLIC: int __blob_del __P((DBC *, db_seq_t));
+ */
+int
+__blob_del(dbc, blob_id)
+	DBC *dbc;
+	db_seq_t blob_id;
+{
+	int ret;
+
+	ret = __blob_file_delete(dbc, blob_id);
+
+	return (ret);
+}
diff --git a/src/blob/blob_stream.c b/src/blob/blob_stream.c
new file mode 100644
index 00000000..ab21aa0f
--- /dev/null
+++ b/src/blob/blob_stream.c
@@ -0,0 +1,283 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2013, 2015 Oracle and/or its affiliates.  All rights reserved.
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+#include "dbinc/fop.h"
+
+static int __db_stream_close __P((DB_STREAM *, u_int32_t));
+static int __db_stream_read
+    __P((DB_STREAM *, DBT *, db_off_t, u_int32_t, u_int32_t));
+static int __db_stream_size __P((DB_STREAM *, db_off_t *, u_int32_t));
+static int __db_stream_write __P((DB_STREAM *, DBT *, db_off_t, u_int32_t));
+
+/*
+ * __db_stream_init
+ *	DB_STREAM initializer.
+ *
+ * PUBLIC: int __db_stream_init __P((DBC *, DB_STREAM **, u_int32_t));
+ */
+int
+__db_stream_init(dbc, dbsp, flags)
+	DBC *dbc;
+	DB_STREAM **dbsp;
+	u_int32_t flags;
+{
+	DB_STREAM *dbs;
+	DB_THREAD_INFO *ip;
+	ENV *env;
+	int ret;
+	off_t size;
+
+	dbs = NULL;
+	env = dbc->env;
+
+	if ((ret = __os_malloc(env, sizeof(DB_STREAM), &dbs)) != 0)
+		return (ret);
+	memset(dbs, 0, sizeof(DB_STREAM));
+
+	ENV_ENTER(env, ip);
+	/* Should the copy be transient? */
+	if ((ret = __dbc_idup(dbc, &dbs->dbc, DB_POSITION)) != 0)
+		goto err;
+	dbs->flags = flags;
+
+	/*
+	 * Make sure we have a write lock on the db record if writing
+	 * to the blob.
+	 */
+	if (F_ISSET(dbs, DB_FOP_WRITE))
+		F_SET(dbc, DBC_RMW);
+
+	if ((ret = __dbc_get_blob_id(dbs->dbc, &dbs->blob_id)) != 0) {
+		if (ret == EINVAL)
+			__db_errx(env, DB_STR("0211",
+			    "Error, cursor does not point to a blob."));
+		goto err;
+	}
+
+	if ((ret = __dbc_get_blob_size(dbs->dbc, &size)) != 0)
+		goto err;
+	dbs->file_size = size;
+
+	if ((ret = __blob_file_open(
+	    dbs->dbc->dbp, &dbs->fhp, dbs->blob_id, flags, 1)) != 0)
+		goto err;
+	ENV_LEAVE(env, ip);
+
+	dbs->close = __db_stream_close;
+	dbs->read = __db_stream_read;
+	dbs->size = __db_stream_size;
+	dbs->write = __db_stream_write;
+
+	*dbsp = dbs;
+	return (0);
+
+err:	if (dbs != NULL && dbs->dbc != NULL)
+		(void)__dbc_close(dbs->dbc);
+	ENV_LEAVE(env, ip);
+	if (dbs != NULL)
+		__os_free(env, dbs);
+	return (ret);
+}
+
+/*
+ * __db_stream_close --
+ *
+ * DB_STREAM->close
+ */
+static int
+__db_stream_close(dbs, flags)
+	DB_STREAM *dbs;
+	u_int32_t flags;
+{
+	DB_THREAD_INFO *ip;
+	ENV *env;
+	int ret;
+
+	env = dbs->dbc->env;
+
+	if ((ret = __db_fchk(env, "DB_STREAM->close", flags, 0)) != 0)
+		return (ret);
+
+	ENV_ENTER(env, ip);
+
+	ret = __db_stream_close_int(dbs);
+
+	ENV_LEAVE(env, ip);
+
+	return (ret);
+}
+
+/*
+ * __db_stream_close_int --
+ *	Close a DB_STREAM object.
+ *
+ * PUBLIC: int __db_stream_close_int __P ((DB_STREAM *));
+ */
+int
+__db_stream_close_int(dbs)
+	DB_STREAM *dbs;
+{
+	DBC *dbc;
+	ENV *env;
+	int ret, t_ret;
+
+	dbc = dbs->dbc;
+	env = dbc->env;
+
+	ret = __blob_file_close(dbc, dbs->fhp, dbs->flags);
+
+	if ((t_ret = __dbc_close(dbs->dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+	__os_free(env, dbs);
+
+	return (ret);
+}
+
+/*
+ * __db_stream_read --
+ *
+ * DB_STREAM->read
+ */
+static int
+__db_stream_read(dbs, data, offset, size, flags)
+	DB_STREAM *dbs;
+	DBT *data;
+	db_off_t offset;
+	u_int32_t size;
+	u_int32_t flags;
+{
+	DBC *dbc;
+	ENV *env;
+	int ret;
+	u_int32_t needed, start;
+
+	dbc = dbs->dbc;
+	env = dbc->dbp->env;
+	ret = 0;
+
+	if ((ret = __db_fchk(env, "DB_STREAM->read", flags, 0)) != 0)
+		return (ret);
+
+	if (F_ISSET(data, DB_DBT_PARTIAL)) {
+		ret = EINVAL;
+		__db_errx(env, DB_STR("0212",
+		    "Error, do not use DB_DBT_PARTIAL with DB_STREAM."));
+		goto err;
+	}
+
+	if (offset > dbs->file_size) {
+		data->size = 0;
+		goto err;
+	}
+
+	if ((ret = __db_alloc_dbt(
+	    env, data, size, &needed, &start, NULL, NULL)) != 0)
+		goto err;
+	data->size = needed;
+
+	if (needed == 0)
+		goto err;
+
+	ret = __blob_file_read(env, dbs->fhp, data, offset, size);
+
+err:	return (ret);
+}
+
+/*
+ * __db_stream_size --
+ *
+ * DB_STREAM->size
+ */
+static int
+__db_stream_size(dbs, size, flags)
+	DB_STREAM *dbs;
+	db_off_t *size;
+	u_int32_t flags;
+{
+	int ret;
+
+	if ((ret = __db_fchk(dbs->dbc->env, "DB_STREAM->size", flags, 0)) != 0)
+		return (ret);
+
+	*size = dbs->file_size;
+
+	return (0);
+}
+
+/*
+ * __db_stream_write --
+ *
+ * DB_STREAM->write
+ */
+static int
+__db_stream_write(dbs, data, offset, flags)
+	DB_STREAM *dbs;
+	DBT *data;
+	db_off_t offset;
+	u_int32_t flags;
+{
+	DB_THREAD_INFO *ip;
+	ENV *env;
+	int ret;
+	off_t file_size;
+	u_int32_t wflags;
+
+	env = dbs->dbc->env;
+
+	if ((ret = __db_fchk(
+	    env, "DB_STREAM->write", flags, DB_STREAM_SYNC_WRITE)) != 0)
+		return (ret);
+
+	if (F_ISSET(dbs, DB_FOP_READONLY)) {
+		ret = EINVAL;
+		__db_errx(env, DB_STR("0213", "Error, blob is read only."));
+		return (ret);
+	}
+	if (F_ISSET(data, DB_DBT_PARTIAL)) {
+		ret = EINVAL;
+		__db_errx(env, DB_STR("0214",
+		    "Error, do not use DB_DBT_PARTIAL with DB_STREAM."));
+		return (ret);
+	}
+	if (offset < 0 ) {
+		ret = EINVAL;
+		__db_errx(env, DB_STR_A("0215",
+		    "Error, invalid offset value: %lld", "%lld"),
+		    (long long)offset);
+		return (ret);
+	}
+	/* Catch overflow. */
+	if (offset + (db_off_t)data->size < offset) {
+		ret = EINVAL;
+		__db_errx(env, DB_STR_A("0216",
+	"Error, this write will exceed the maximum blob size: %lu %lld",
+		"%lu %lld"), (u_long)data->size, (long long)offset);
+		return (ret);
+	}
+
+	ENV_ENTER(env, ip);
+	wflags = dbs->flags;
+	if (LF_ISSET(DB_STREAM_SYNC_WRITE))
+		wflags |= DB_FOP_SYNC_WRITE;
+	file_size = dbs->file_size;
+	if ((ret = __blob_file_write(dbs->dbc, dbs->fhp,
+	    data, offset, dbs->blob_id, &file_size, wflags)) != 0)
+		goto err;
+	if (file_size != dbs->file_size) {
+		dbs->file_size = file_size;
+		if ((ret = __dbc_set_blob_size(dbs->dbc, dbs->file_size)) != 0)
+			goto err;
+	}
+err:	ENV_LEAVE(env, ip);
+
+	return (ret);
+}
diff --git a/src/blob/blob_util.c b/src/blob/blob_util.c
new file mode 100644
index 00000000..b2e3474b
--- /dev/null
+++ b/src/blob/blob_util.c
@@ -0,0 +1,1189 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2013, 2015 Oracle and/or its affiliates.  All rights reserved.
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_verify.h"
+#include "dbinc/db_am.h"
+#include "dbinc/blob.h"
+#include "dbinc/fop.h"
+#include "dbinc/txn.h"
+#include "dbinc_auto/sequence_ext.h"
+
+static int __blob_open_meta_db __P((
+    DB *, DB_TXN *, DB **, DB_SEQUENCE **, int, int));
+static int __blob_clean_dir
+    __P((ENV *, DB_TXN *, const char *, const char *, int));
+static int __blob_copy_dir __P((DB *, const char *, const char *));
+
+#define	BLOB_ID_KEY "blob_id"
+#define	BLOB_SEQ_DB_NAME "blob_id_seq"
+#define	BLOB_DIR_ID_KEY "blob_dir_id"
+#define	BLOB_DIR_SEQ_DB_NAME "blob_dir_id_seq"
+
+/*
+ * __blob_make_sub_dir --
+ *	Create the name of the subdirectory in the blob directory
+ * for the given database file and subdatabase ids.
+ *
+ * PUBLIC: int __blob_make_sub_dir __P((ENV *, char **, db_seq_t, db_seq_t));
+ */
+int
+__blob_make_sub_dir(env, blob_sub_dir, file_id, db_id)
+	ENV *env;
+	char **blob_sub_dir;
+	db_seq_t file_id;
+	db_seq_t db_id;
+{
+	char fname[MAX_BLOB_PATH_SZ], dname[MAX_BLOB_PATH_SZ];
+	int ret;
+	size_t len;
+
+	*blob_sub_dir = NULL;
+	memset(fname, 0, MAX_BLOB_PATH_SZ);
+	memset(dname, 0, MAX_BLOB_PATH_SZ);
+
+	if (db_id == 0 && file_id == 0)
+		return (0);
+
+	if (db_id < 0 || file_id < 0)
+		return (EINVAL);
+
+	/* The master db has no subdb id. */
+	if (db_id != 0)
+		(void)snprintf(dname, MAX_BLOB_PATH_SZ,
+		    "%s%llu", BLOB_DIR_PREFIX, (unsigned long long)db_id);
+	(void)snprintf(fname, MAX_BLOB_PATH_SZ, "%s%llu",
+	    BLOB_DIR_PREFIX, (unsigned long long)file_id);
+
+	len = strlen(fname) + (db_id ? strlen(dname) : 0) + 3;
+	if ((ret = __os_malloc(env, len, blob_sub_dir)) != 0)
+		goto err;
+	if (db_id != 0)
+		(void)sprintf(*blob_sub_dir, "%s%c%s%c", fname,
+		    PATH_SEPARATOR[0], dname, PATH_SEPARATOR[0]);
+	else
+		(void)sprintf(*blob_sub_dir, "%s%c", fname, PATH_SEPARATOR[0]);
+
+	return (0);
+
+err:	if (*blob_sub_dir != NULL)
+		__os_free(env, *blob_sub_dir);
+
+	return (ret);
+}
+
+/*
+ * __blob_make_meta_fname --
+ *	Construct a (usually partial) path name of a blob metadata data file.
+ *	It usually is relative to the environment home directory; only when a
+ *	blob directory is configured and is an absolute path does this make a
+ *	full path.
+ *
+ *	When dbp is set it constructs the blob metadata filename for that db;
+ *	otherwise it constructs the environment-wide directory id filename.
+ *
+ * PUBLIC: int __blob_make_meta_fname __P((ENV *, DB *, char **));
+ */
+int
+__blob_make_meta_fname(env, dbp, meta_fname)
+	ENV *env;
+	DB *dbp;
+	char **meta_fname;
+{
+	char *fname, *sub_dir;
+	int ret;
+	size_t len;
+
+	fname = NULL;
+	len = strlen(BLOB_META_FILE_NAME) + 1;
+	if (dbp == NULL) {
+		sub_dir = "";
+	} else {
+		sub_dir = dbp->blob_sub_dir;
+		DB_ASSERT(env, sub_dir != NULL);
+		len += strlen(sub_dir);
+	}
+	if ((ret = __os_malloc(env, len, &fname)) != 0)
+		goto err;
+
+	snprintf(fname, len, "%s%s", sub_dir, BLOB_META_FILE_NAME);
+	*meta_fname = fname;
+	return (0);
+err:
+	if (fname != NULL)
+		__os_free(env, fname);
+	return (ret);
+}
+
+/*
+ * __blob_get_dir --
+ *	Get the root directory of this database's blob files.
+ *
+ * PUBLIC: int __blob_get_dir __P((DB *, char **));
+ */
+int
+__blob_get_dir(dbp, dirp)
+	DB *dbp;
+	char **dirp;
+{
+	char *blob_dir;
+	int ret;
+
+	*dirp = NULL;
+
+	if (dbp->blob_sub_dir == NULL)
+		return (0);
+
+	/* Get the path of the blob directory for this database. */
+	if ((ret = __db_appname(dbp->env,
+	    DB_APP_BLOB, dbp->blob_sub_dir, NULL, &blob_dir)) != 0)
+		goto err;
+
+	*dirp = blob_dir;
+	return (0);
+
+err:	if (blob_dir != NULL)
+		__os_free(dbp->env, blob_dir);
+
+	return (ret);
+}
+
+/*
+ * __blob_open_meta_db --
+ *	Open or create a blob meta database. This can be either
+ *	the environment-wide db used to generate blob directory ids (__db1), or
+ *	the per-db db used to generate blob ids (__db.bl001).
+ */
+static int
+__blob_open_meta_db(dbp, txn, meta_db, seq, file, create)
+	DB *dbp;
+	DB_TXN *txn;
+	DB **meta_db;
+	DB_SEQUENCE **seq;
+	int file;
+	int create;
+{
+#ifdef HAVE_64BIT_TYPES
+	ENV *env;
+	DB *blob_meta_db;
+	DBT key;
+	DB_SEQUENCE *blob_seq;
+	DB_THREAD_INFO *ip;
+	DB_TXN *local_txn;
+	char *fullname, *fname, *dname, *path;
+	int free_paths, ret, use_txn;
+	u_int32_t flags;
+
+	flags = 0;
+	fullname = fname = NULL;
+	blob_meta_db = NULL;
+	blob_seq = NULL;
+	local_txn = NULL;
+	env = dbp->env;
+	free_paths = use_txn = 0;
+	memset(&key, 0, sizeof(DBT));
+
+	/*
+	 * Get the directory of the database, the meta db file name,
+	 * and the sub-db name.
+	 * file: blob directory/meta-file-name
+	 * else: blob directory/per-db-blobdir/meta-file-name
+	 */
+	if (file) {
+		key.data = BLOB_DIR_ID_KEY;
+		key.size = (u_int32_t)strlen(BLOB_DIR_ID_KEY);
+		dname = BLOB_DIR_SEQ_DB_NAME;
+		fname = BLOB_META_FILE_NAME;
+	} else {
+		key.data = BLOB_ID_KEY;
+		key.size = (u_int32_t)strlen(BLOB_ID_KEY);
+		dname = BLOB_SEQ_DB_NAME;
+		if ((ret = __blob_make_meta_fname(env,
+		    file ? NULL : dbp, &fname)) < 0)
+			goto err;
+		free_paths = 1;
+		if (dbp->open_flags & DB_THREAD)
+			LF_SET(DB_THREAD);
+	}
+
+	if ((ret = __db_appname(env, DB_APP_BLOB, fname, NULL, &fullname)) != 0)
+		goto err;
+
+	path = fullname;
+#ifdef DB_WIN32
+	/*
+	 * Absolute paths on windows can result in it creating a "C" or "D"
+	 * directory in the working directory.
+	 */
+	if (__os_abspath(path))
+		path += 2;
+#endif
+	/*
+	 * Create the blob, database file, and database name directories. The
+	 * mkdir isn't logged, so __fop_create_recover needs to do this as well.
+	 */
+	if (__os_exists(env, fullname, NULL) != 0) {
+	    if (!create) {
+		    ret = ENOENT;
+		    goto err;
+	    } else if ((ret = __db_mkpath(env, path)) != 0)
+		    goto err;
+	}
+
+	if ((ret = __db_create_internal(&blob_meta_db, env, 0)) != 0)
+		goto err;
+
+	if (create)
+		LF_SET(DB_CREATE);
+
+	/* Disable blobs in the blob meta databases themselves. */
+	if ((ret = __db_set_blob_threshold(blob_meta_db, 0, 0)) != 0)
+		goto err;
+
+	/*
+	 * To avoid concurrency issues, the blob meta database is
+	 * opened and operated on in a local transaction.  The one
+	 * exception is when the blob meta database is created in the
+	 * same txn as the parent db.  Then the blob meta database
+	 * shares the given txn, so if the txn is rolled back, the
+	 * creation of the blob meta database will also be rolled back.
+	 */
+	if (!file && IS_REAL_TXN(dbp->cur_txn))
+		use_txn = 1;
+
+	ENV_GET_THREAD_INFO(env, ip);
+	if (IS_REAL_TXN(txn)) {
+		if (use_txn)
+			local_txn = txn;
+		else {
+			if ((ret = __txn_begin(
+			    env, ip, NULL, &local_txn, DB_IGNORE_LEASE)) != 0)
+				goto err;
+		}
+	}
+	if ((ret = __db_open(blob_meta_db, ip, local_txn, fname, dname,
+	    DB_BTREE, flags | DB_INTERNAL_BLOB_DB, 0, PGNO_BASE_MD)) != 0)
+		goto err;
+
+	/* Open the sequence that holds the blob ids. */
+	if ((ret = db_sequence_create(&blob_seq, blob_meta_db, 0)) != 0)
+		goto err;
+
+	/* No-op if already initialized, 0 is an invalid value for blob ids. */
+	if ((ret = __seq_initial_value(blob_seq, 1)) != 0)
+		goto err;
+	if ((ret = __seq_open(blob_seq, local_txn, &key, flags)) != 0)
+		goto err;
+
+	if (local_txn != NULL && use_txn == 0 &&
+	    (ret = __txn_commit(local_txn, 0)) != 0) {
+		local_txn = NULL;
+		goto err;
+	}
+	__os_free(env, fullname);
+	if (free_paths)
+		__os_free(env, fname);
+	*meta_db = blob_meta_db;
+	*seq = blob_seq;
+	return (0);
+
+err:
+	if (fullname)
+		__os_free(env, fullname);
+	if (fname != NULL && free_paths)
+		__os_free(env, fname);
+	if (local_txn != NULL && use_txn == 0)
+		(void)__txn_abort(local_txn);
+	if (blob_seq != NULL)
+		(void)__seq_close(blob_seq, 0);
+	if (blob_meta_db != NULL)
+		(void)__db_close(blob_meta_db, NULL, 0);
+	return (ret);
+
+#else /*HAVE_64BIT_TYPES*/
+	__db_errx(dbp->env, DB_STR("0217",
+	    "library build did not include support for blobs"));
+	return (DB_OPNOTSUP);
+#endif
+}
+
+/*
+ * __blob_generate_dir_ids --
+ *
+ * Generate the unique ids used to create a blob directory for the database.
+ * Only one argument is needed.  Files with one database only need the
+ * file id.  The master database only needs the file id, and
+ * subdatabases inherit the file id from the master, so they only need the
+ * subdatabase id.
+ *
+ * PUBLIC: int __blob_generate_dir_ids
+ * PUBLIC:	__P((DB *, DB_TXN *, db_seq_t *));
+ */
+int
+__blob_generate_dir_ids(dbp, txn, id)
+	DB *dbp;
+	DB_TXN *txn;
+	db_seq_t *id;
+{
+	DB *blob_meta_db;
+	DB_SEQUENCE *blob_seq;
+	int ret;
+	u_int32_t flags;
+
+#ifdef HAVE_64BIT_TYPES
+	flags = 0;
+	blob_meta_db = NULL;
+	blob_seq = NULL;
+
+	if ((ret = __blob_open_meta_db(
+	    dbp, txn, &blob_meta_db, &blob_seq, 1, 1)) != 0)
+		goto err;
+
+	if (IS_REAL_TXN(txn))
+		LF_SET(DB_AUTO_COMMIT | DB_TXN_NOSYNC);
+
+	DB_ASSERT(dbp->env, id != NULL);
+	if (*id == 0) {
+		if ((ret = __seq_get(blob_seq, 0, 1, id, flags)) != 0)
+			goto err;
+	}
+
+err:	if (blob_seq != NULL)
+		(void)__seq_close(blob_seq, 0);
+	if (blob_meta_db != NULL)
+		(void)__db_close(blob_meta_db, NULL, 0);
+	return (ret);
+#else /*HAVE_64BIT_TYPES*/
+	COMPQUIET(dbp, NULL);
+	COMPQUIET(txn, NULL);
+	__db_errx(dbp->env, DB_STR("0218",
+	    "library build did not include support for blobs"));
+	return (DB_OPNOTSUP);
+#endif
+}
+
+/*
+ * __blob_generate_id --
+ * Generate a new blob ID.
+ *
+ * PUBLIC: int __blob_generate_id __P((DB *, DB_TXN *, db_seq_t *));
+ */
+int
+__blob_generate_id(dbp, txn, blob_id)
+	DB *dbp;
+	DB_TXN *txn;
+	db_seq_t *blob_id;
+{
+#ifdef HAVE_64BIT_TYPES
+	DB_TXN *ltxn;
+	int ret;
+	u_int32_t flags;
+	flags = DB_IGNORE_LEASE;
+	ltxn = NULL;
+
+	if (dbp->blob_seq == NULL) {
+		if ((ret = __blob_open_meta_db(dbp, txn,
+		    &dbp->blob_meta_db, &dbp->blob_seq, 0, 1)) != 0)
+			goto err;
+	}
+
+	/*
+	 * If this is the opening transaction of the database, use it instead
+	 * of auto commit.  Otherwise it could deadlock with the transaction
+	 * used to open the blob meta database in __blob_open_meta_db.
+	 */
+	if (IS_REAL_TXN(dbp->cur_txn))
+		ltxn = txn;
+
+	if (IS_REAL_TXN(txn) && ltxn == NULL)
+		LF_SET(DB_AUTO_COMMIT | DB_TXN_NOSYNC);
+
+	if ((ret = __seq_get(dbp->blob_seq, ltxn, 1, blob_id, flags)) != 0)
+		goto err;
+
+err:	return (ret);
+#else /*HAVE_64BIT_TYPES*/
+	COMPQUIET(blob_id, NULL);
+	__db_errx(dbp->env, DB_STR("0219",
+	    "library build did not include support for blobs"));
+	return (DB_OPNOTSUP);
+#endif
+}
+
+/*
+ * __blob_highest_id
+ *
+ * Returns the highest id in the blob meta database.
+ *
+ * PUBLIC: int __blob_highest_id __P((DB *, DB_TXN *, db_seq_t *));
+ */
+int
+__blob_highest_id(dbp, txn, id)
+	DB *dbp;
+	DB_TXN *txn;
+	db_seq_t *id;
+{
+#ifdef HAVE_64BIT_TYPES
+	int ret;
+
+	*id = 0;
+	if (dbp->blob_sub_dir == NULL) {
+		if ((ret = __blob_make_sub_dir(dbp->env, &dbp->blob_sub_dir,
+		    dbp->blob_file_id, dbp->blob_sdb_id)) != 0)
+				goto err;
+	}
+	if (dbp->blob_seq == NULL) {
+		ret = __blob_open_meta_db(dbp, txn,
+		    &dbp->blob_meta_db, &dbp->blob_seq, 0, 0);
+		/*
+		 * It is not an error if the blob meta database does not
+		 * exist.
+		 */
+		if (ret == ENOENT)
+			ret = 0;
+		if (ret != 0)
+			goto err;
+	}
+
+	ret = __seq_get(dbp->blob_seq, txn, 0, id, DB_CURRENT);
+err:
+	return (ret);
+#else /*HAVE_64BIT_TYPES*/
+	COMPQUIET(id, NULL);
+	__db_errx(dbp->env, DB_STR("0245",
+	    "library build did not include support for blobs"));
+	return (DB_OPNOTSUP);
+#endif
+}
+
+/*
+ * __blob_calculate_dirs
+ *
+ * Use a blob id to to determine the path below the blob subdirectory in
+ * which the blob file is located.  Assumes enough space exists in the path
+ * variable to hold the path.
+ *
+ * PUBLIC: void __blob_calculate_dirs __P((db_seq_t, char *, int *, int *));
+ */
+void
+__blob_calculate_dirs(blob_id, path, len, depth)
+	db_seq_t blob_id;
+	char *path;
+	int *len;
+	int *depth;
+{
+	int i;
+	db_seq_t factor, tmp;
+
+	/* Calculate the subdirectories from the blob id. */
+	factor = 1;
+	for ((*depth) = 0, tmp = blob_id/BLOB_DIR_ELEMS;
+	    tmp != 0; tmp = tmp/BLOB_DIR_ELEMS, (*depth)++)
+		factor *= BLOB_DIR_ELEMS;
+
+	for (i = (*depth); i > 0; i--) {
+		tmp = (blob_id / factor) % BLOB_DIR_ELEMS;
+		factor /= BLOB_DIR_ELEMS;
+		(*len) += sprintf(path + (*len),
+			"%03llu%c", (unsigned long long)tmp, PATH_SEPARATOR[0]);
+	}
+}
+
+/*
+ * __blob_id_to_path --
+ * Generate the file name and blob specific part of the path for a particular
+ * blob_id. The __db_appname API is used to generate a fully qualified path.
+ * The caller must deallocate the path.
+ *
+ * PUBLIC: int __blob_id_to_path __P((ENV *, const char *, db_seq_t, char **));
+ */
+int
+__blob_id_to_path(env, blob_sub_dir, blob_id, ppath)
+	ENV *env;
+	const char *blob_sub_dir;
+	db_seq_t blob_id;
+	char **ppath;
+{
+	char *path, *tmp_path;
+	int depth, name_len, ret;
+	size_t len;
+
+	name_len = 0;
+	path = tmp_path = *ppath = NULL;
+
+	if (blob_id < 1) {
+		ret = EINVAL;
+		goto err;
+	}
+
+	len = MAX_BLOB_PATH_SZ + strlen(blob_sub_dir) + 1;
+	if ((ret = __os_malloc(env, len, &path)) != 0)
+		goto err;
+
+	memset(path, 0, len);
+	name_len += sprintf(path, "%s", blob_sub_dir);
+
+	__blob_calculate_dirs(blob_id, path, &name_len, &depth);
+
+	/*
+	 * Populate the file name. Ensure there are 3 digits for each directory
+	 * level (even if they are 0).
+	 */
+	(void)sprintf(path + name_len, "%s%0*llu",
+	    BLOB_FILE_PREFIX, (depth + 1) * 3, (unsigned long long)blob_id);
+
+	/* If this is the first file in the directory, ensure it exists. */
+	if (blob_id % BLOB_DIR_ELEMS == 0 && depth > 0) {
+		if ((ret = __db_appname(
+		    env, DB_APP_BLOB, path, NULL, &tmp_path)) != 0 )
+			goto err;
+
+		if ((ret = __db_mkpath(env, tmp_path)) != 0) {
+			__db_errx(env, DB_STR("0221",
+			    "Error creating blob directory."));
+			ret = EINVAL;
+			goto err;
+		}
+		__os_free(env, tmp_path);
+	}
+
+	*ppath = path;
+	return (0);
+
+err:
+	if (tmp_path != NULL)
+		__os_free(env, tmp_path);
+	if (path != NULL)
+		__os_free(env, path);
+
+	return (ret);
+}
+
+/*
+ * __blob_str_to_id
+ *
+ * If the given string is a positive number, it returns it as a signed
+ * 64 bit integer.  Otherwise the number is returned as 0.
+ *
+ * PUBLIC:  int __blob_str_to_id __P((ENV *, const char **, db_seq_t *));
+ */
+int
+__blob_str_to_id(env, path, id)
+	ENV *env;
+	const char **path;
+	db_seq_t *id;
+{
+	db_seq_t i;
+	const char *p;
+	char buf[2];
+
+	p = *path;
+	i = 10;
+	*id = 0;
+	buf[1] = '\0';
+	while (p[0] >= '0' && p[0] <= '9') {
+		*id *= i;
+		buf[0] = p[0];
+		*id += atoi(buf);
+		if (*id < 0) {
+			__db_errx(env, DB_STR("0246",
+			    "Blob id integer overflow."));
+			return (EINVAL);
+		}
+		p++;
+	}
+	*path = p;
+	return (0);
+}
+
+/*
+ * __blob_path_to_dir_ids --
+ * Get the file and subdatabase ids from a path to a blob file
+ * or a path in the blob directory structure.  Skips the
+ * subdatabase directory id if sdb_id is NULL.
+ *
+ * PUBLIC: int __blob_path_to_dir_ids
+ * PUBLIC:	__P((ENV *, const char *, db_seq_t *, db_seq_t *));
+ */
+int
+__blob_path_to_dir_ids(env, path, file_id, sdb_id)
+	ENV *env;
+	const char *path;
+	db_seq_t *file_id;
+	db_seq_t *sdb_id;
+{
+	int ret;
+	size_t len;
+	const char *p;
+
+	*file_id = 0;
+	if (sdb_id != NULL)
+		*sdb_id = 0;
+	ret = 0;
+	p = path;
+
+	/*
+	 * The blob file and subdatabase directories are of the form __db###,
+	 * so search the string for any directories that match that form.
+	 */
+	len = strlen(path);
+	do {
+		p = strstr(p, BLOB_DIR_PREFIX);
+		if (p == NULL || p > (path + len + 4))
+			return (ret);
+		p += 4;
+	} while (p[0] < '0' || p[0] > '9');
+
+	/* The file id should be next in the path. */
+	if ((ret = __blob_str_to_id(env, &p, file_id)) != 0)
+		return (ret);
+
+	/* Quit now if a subdatabase argument was not passed. */
+	if (sdb_id == NULL)
+		return (ret);
+
+	p = strstr(p, BLOB_DIR_PREFIX);
+	/* It is okay for the path not to include a sdb_id. */
+	if (p == NULL || p > (path + 4 + len))
+		return (ret);
+
+	p += 4;
+	ret = __blob_str_to_id(env, &p, sdb_id);
+
+	return (ret);
+}
+
+/*
+ * __blob_salvage --
+ *
+ * Print a blob file during salvage.  The function assumes the DBT already has
+ * a buffer large enough to hold "size" bytes.
+ *
+ * PUBLIC: int __blob_salvage __P((ENV *, db_seq_t, off_t, size_t,
+ * PUBLIC:	db_seq_t, db_seq_t, DBT *));
+ */
+int
+__blob_salvage(env, blob_id, offset, size, file_id, sdb_id, dbt)
+	ENV *env;
+	db_seq_t blob_id;
+	off_t offset;
+	size_t size;
+	db_seq_t file_id;
+	db_seq_t sdb_id;
+	DBT *dbt;
+{
+	DB_FH *fhp;
+	char *blob_sub_dir, *dir, *path;
+	int ret;
+	size_t bytes;
+
+	blob_sub_dir = dir = path = NULL;
+	fhp = NULL;
+
+	if (file_id == 0 && sdb_id == 0) {
+		ret = ENOENT;
+		goto err;
+	}
+
+	if ((ret = __blob_make_sub_dir(
+	    env, &blob_sub_dir, file_id, sdb_id)) != 0)
+		goto err;
+
+	if ((ret = __blob_id_to_path(env, blob_sub_dir, blob_id, &dir)) != 0)
+		goto err;
+
+	if ((ret = __db_appname(env, DB_APP_BLOB, dir, NULL, &path)) != 0)
+		goto err;
+
+	if ((ret = __os_open(env, path, 0, DB_OSO_RDONLY, 0, &fhp)) != 0)
+		goto err;
+
+	if ((ret = __os_seek(env, fhp, 0, 0, offset)) != 0)
+		goto err;
+
+	if ((ret = __os_read(env, fhp, dbt->data, size, &bytes)) != 0)
+		goto err;
+
+	dbt->size = (u_int32_t)bytes;
+	if (bytes != size)
+		ret = EIO;
+
+err:	if (fhp != NULL)
+		(void)__os_closehandle(env, fhp);
+	if (dir != NULL)
+		__os_free(env, dir);
+	if (path != NULL)
+		__os_free(env, path);
+	if (blob_sub_dir != NULL)
+		__os_free(env, blob_sub_dir);
+	return (ret);
+}
+
+/*
+ * __blob_vrfy --
+ *
+ * Checks that a blob file for the given blob id exists, and is the given size.
+ *
+ * PUBLIC: int __blob_vrfy __P((ENV *, db_seq_t, off_t,
+ * PUBLIC:	db_seq_t, db_seq_t, db_pgno_t, u_int32_t));
+ */
+int
+__blob_vrfy(env, blob_id, blob_size, file_id, sdb_id, pgno, flags)
+	ENV *env;
+	db_seq_t blob_id;
+	off_t blob_size;
+	db_seq_t file_id;
+	db_seq_t sdb_id;
+	db_pgno_t pgno;
+	u_int32_t flags;
+{
+	DB_FH *fhp;
+	char *blob_sub_dir, *dir, *path;
+	int isdir, ret;
+	off_t actual_size;
+	u_int32_t mbytes, bytes;
+
+	blob_sub_dir = dir = path = NULL;
+	fhp = NULL;
+	isdir = 0;
+	ret = DB_VERIFY_BAD;
+
+	if ((ret = __blob_make_sub_dir(
+	    env, &blob_sub_dir, file_id, sdb_id)) != 0)
+		goto err;
+
+	if (__blob_id_to_path(env, blob_sub_dir, blob_id, &dir) != 0) {
+		EPRINT((env, DB_STR_A("0222",
+		    "Page %lu: Error getting path to blob file for %llu",
+		    "%lu %llu"), (u_long)pgno, (unsigned long long)blob_id));
+		goto err;
+	}
+	if (__db_appname(env, DB_APP_BLOB, dir, NULL, &path) != 0) {
+		EPRINT((env, DB_STR_A("0223",
+		    "Page %lu: Error getting path to blob file for %llu",
+		    "%lu %llu"), (u_long)pgno, (unsigned long long)blob_id));
+		goto err;
+	}
+	if ((__os_exists(env, path, &isdir)) != 0 || isdir != 0) {
+		EPRINT((env, DB_STR_A("0224",
+		    "Page %lu: blob file does not exist at %s",
+		    "%lu %s"), (u_long)pgno, path));
+		goto err;
+	}
+	if (__os_open(env, path, 0, DB_OSO_RDONLY, 0, &fhp) != 0) {
+		EPRINT((env, DB_STR_A("0225",
+		    "Page %lu: Error opening blob file at %s",
+		    "%lu %s"), (u_long)pgno, path));
+		goto err;
+	}
+	if (__os_ioinfo(env, path, fhp, &mbytes, &bytes, NULL) != 0) {
+		EPRINT((env, DB_STR_A("0226",
+		    "Page %lu: Error getting blob file size at %s",
+		    "%lu %s"), (u_long)pgno, path));
+		goto err;
+	}
+
+	actual_size = ((off_t)mbytes * (off_t)MEGABYTE) + bytes;
+	if (blob_size != actual_size) {
+		EPRINT((env, DB_STR_A("0227",
+"Page %lu: blob file size does not match size in database record: %llu %llu",
+		    "%lu %llu %llu"), (u_long)pgno,
+		    (unsigned long long)actual_size,
+		    (unsigned long long)blob_size));
+		goto err;
+	}
+
+	ret = 0;
+
+err:	if (fhp != NULL)
+		(void)__os_closehandle(env, fhp);
+	if (dir != NULL)
+		__os_free(env, dir);
+	if (path != NULL)
+		__os_free(env, path);
+	if (blob_sub_dir != NULL)
+		__os_free(env, blob_sub_dir);
+	return (ret);
+}
+
+/*
+ * __blob_del_hierarchy --
+ *
+ * Deletes the entire blob directory.  Used by replication.
+ *
+ * PUBLIC: int __blob_del_hierarchy __P((ENV *));
+ */
+int
+__blob_del_hierarchy(env)
+	ENV *env;
+{
+	int ret;
+	char *blob_dir;
+
+	blob_dir = NULL;
+
+	if ((ret = __db_appname(env, DB_APP_BLOB, NULL, NULL, &blob_dir)) != 0)
+		goto err;
+
+	if ((ret = __blob_clean_dir(env, NULL, blob_dir, NULL, 0)) != 0)
+		goto err;
+
+err:	if (blob_dir != NULL)
+		__os_free(env, blob_dir);
+	return (ret);
+}
+
+/*
+ * __blob_del_all --
+ *
+ * Deletes all the blob files and meta databases in a database's blob
+ * directory.  Does not delete the directories if the delete is transactionally
+ * protected, since there is no current way to undo a directory delete in case
+ * the operation is aborted.
+ *
+ * PUBLIC: int __blob_del_all __P((DB *, DB_TXN *, int));
+ */
+int
+__blob_del_all(dbp, txn, istruncate)
+	DB *dbp;
+	DB_TXN *txn;
+	int istruncate;
+{
+#ifdef HAVE_64BIT_TYPES
+	ENV *env;
+	char *path;
+	int isdir, ret;
+
+	env = dbp->env;
+	path = NULL;
+	ret = 0;
+
+	if (dbp->blob_sub_dir == NULL) {
+		if ((ret = __blob_make_sub_dir(env, &dbp->blob_sub_dir,
+		    dbp->blob_file_id, dbp->blob_sdb_id)) != 0)
+			goto err;
+	}
+
+	/* Do nothing if blobs are not enabled. */
+	if (dbp->blob_sub_dir == NULL ||
+	    (dbp->blob_file_id == 0 && dbp->blob_sdb_id == 0))
+		goto err;
+
+	if ((ret = __blob_get_dir(dbp, &path)) != 0)
+		goto err;
+
+	/* Close the blob meta data databases, they are about to be deleted. */
+	if (!istruncate) {
+		if (dbp->blob_seq != NULL) {
+			if ((ret = __seq_close(dbp->blob_seq, 0)) != 0)
+			    goto err;
+			dbp->blob_seq = NULL;
+		}
+		if (dbp->blob_meta_db != NULL) {
+			if ((ret =
+			    __db_close(dbp->blob_meta_db, NULL, 0)) != 0)
+			    goto err;
+			dbp->blob_meta_db = NULL;
+		}
+	}
+
+	/*
+	 * The blob directory may not exist if blobs were enabled,
+	 * but none were created.
+	 */
+	if (__os_exists(env, path, &isdir) != 0)
+		goto err;
+
+	if ((ret = __blob_clean_dir(
+	    env, txn, path, dbp->blob_sub_dir, istruncate)) != 0)
+		goto err;
+
+	if (!IS_REAL_TXN(txn) && !istruncate) {
+		if ((ret = __os_rmdir(env, path)) != 0)
+			goto err;
+	}
+
+err:	if (path != NULL)
+		__os_free(env, path);
+	return (ret);
+
+#else /*HAVE_64BIT_TYPES*/
+	__db_errx(dbp->env, DB_STR("0220",
+	    "library build did not include support for blobs"));
+	return (DB_OPNOTSUP);
+#endif
+
+}
+
+/*
+ * __blob_clean_dir --
+ *
+ * Delete all files in the given directory, and all files
+ * in all sub-directories.  Does not remove directories if the operation is
+ * transactionally protected.
+ */
+static int
+__blob_clean_dir(env, txn, dir, subdir, istruncate)
+	ENV *env;
+	DB_TXN *txn;
+	const char *dir;
+	const char *subdir;
+	int istruncate;
+{
+	DB *meta;
+	DB_THREAD_INFO *ip;
+	char *blob_dir, **dirs, *fname, full_path[DB_MAXPATHLEN], *local_path;
+	int count, i, isdir, ret, t_ret;
+
+	count = 0;
+	dirs = NULL;
+	fname = NULL;
+	meta = NULL;
+
+	/* Get a list of all files in the directory. */
+	if ((ret = __os_dirlist(env, dir, 1, &dirs, &count)) != 0) {
+		if (ret == ENOENT)
+			ret = 0;
+		goto err;
+	}
+
+	for (i = 0; i < count; i++) {
+		(void)sprintf(full_path, "%s%c%s%c",
+		    dir, PATH_SEPARATOR[0], dirs[i], '\0');
+
+		if (__os_exists(env, full_path, &isdir) != 0)
+			continue;
+
+		/* If it is a directory, clean it.  Else remove the file. */
+		if (isdir) {
+			if ((ret = __blob_clean_dir(
+			    env, txn, full_path, subdir, istruncate)) != 0)
+				goto err;
+			/* Delete the top directory. */
+			if (!IS_REAL_TXN(txn)) {
+				if ((ret = __os_rmdir(env, full_path)) != 0)
+					goto err;
+			}
+		} else if (strcmp(dirs[i], BLOB_META_FILE_NAME) == 0 ) {
+			/* Ignore the meta db when truncating. */
+			if (istruncate)
+				continue;
+			blob_dir = (env->dbenv->db_blob_dir != NULL ?
+			    env->dbenv->db_blob_dir : BLOB_DEFAULT_DIR);
+			if ((fname = strstr(full_path, blob_dir)) == NULL)
+				goto err;
+			fname += strlen(blob_dir) + 1;
+			if ((ret = __db_create_internal(&meta, env, 0)) != 0)
+				goto err;
+			ENV_GET_THREAD_INFO(env, ip);
+			if ((ret = __db_remove_int(meta,
+			    ip, txn, fname, NULL, 0)) != 0)
+				goto err;
+			/*
+			 * Closing the local DB handle releases the transaction
+			 * locks, but those have to remain until the
+			 * transaction is resolved, so NULL the DB locker.
+			 * See __env_dbremove_pp for more details.
+			 */
+			if (IS_REAL_TXN(txn))
+				meta->locker = NULL;
+			if ((t_ret = __db_close(
+			    meta, NULL, DB_NOSYNC)) != 0 && ret == 0)
+				ret = t_ret;
+			meta = NULL;
+			if (ret != 0)
+				goto err;
+		} else {
+			if (!IS_REAL_TXN(txn))
+				ret = __os_unlink(env, full_path, 0);
+			else {
+				local_path = (subdir == NULL ? full_path :
+				    strstr(full_path, subdir));
+				if (local_path != NULL)
+					ret = __fop_remove(env, txn, NULL,
+					    local_path, NULL, DB_APP_BLOB, 0);
+			}
+			if (ret != 0)
+				goto err;
+		}
+	}
+err:	if (meta != NULL) {
+		if ((t_ret = __db_close(
+		    meta, NULL, DB_NOSYNC)) != 0 && ret == 0)
+			ret = t_ret;
+	}
+	if (dirs != NULL)
+		__os_dirfree(env, dirs, count);
+
+	return (ret);
+}
+
+/*
+ * __blob_copy_all --
+ *	Copy all files in the blob directory.
+ *
+ * PUBLIC: int __blob_copy_all __P((DB*, const char *, u_int32_t));
+ */
+int __blob_copy_all(dbp, target, flags)
+	DB *dbp;
+	const char *target;
+	u_int32_t flags;
+{
+	DB_THREAD_INFO *ip;
+	ENV *env;
+	char *blobdir, *fullname, *metafname, new_target[DB_MAXPATHLEN];
+	const char *path;
+	int ret;
+
+	env = dbp->env;
+	blobdir = NULL;
+	fullname = NULL;
+	metafname = NULL;
+	ret = 0;
+
+	/* Do nothing if blobs are not enabled. */
+	if (dbp->blob_sub_dir == NULL || dbp->blob_threshold == 0)
+		return (0);
+
+	/* Create the directory structure in the target directory. */
+	if (env->dbenv->db_blob_dir != NULL)
+		path = env->dbenv->db_blob_dir;
+	else
+		path = BLOB_DEFAULT_DIR;
+
+	/*
+	 * Default blob directory will be maintained in the target
+	 * directory only when it is backing up a single directory.
+	 */
+	(void)snprintf(new_target, sizeof(new_target), "%s%c%s%c%c",
+	    target, PATH_SEPARATOR[0], LF_ISSET(DB_BACKUP_SINGLE_DIR) ?
+	    BLOB_DEFAULT_DIR : path, PATH_SEPARATOR[0], '\0');
+	path = new_target;
+#ifdef DB_WIN32
+	/*
+	 * Absolute paths on windows can result in it creating a "C" or "D"
+	 * directory in the working directory.
+	 */
+	if (__os_abspath(path))
+		path += 2;
+#endif
+	if ((ret = __db_mkpath(env, path)) != 0)
+		goto err;
+
+	/* Copy the directory id database. */
+	if ((ret = __blob_make_meta_fname(env, NULL, &metafname)) != 0)
+		goto err;
+	if ((ret = __db_appname(env,
+	    DB_APP_BLOB, metafname, NULL, &fullname)) != 0)
+		goto err;
+	path = fullname;
+	/* Remove env home from the full path of directory id database. */
+	if (!__os_abspath(fullname) &&
+	    env->db_home != NULL && (env->db_home)[0] != '\0')
+		path += (strlen(env->db_home) + 1);
+	ENV_GET_THREAD_INFO(env, ip);
+
+	if ((ret = __db_dbbackup(
+	    dbp->dbenv, ip, path, new_target, 0, 0, metafname)) != 0)
+		goto err;
+
+	if ((ret = __blob_get_dir(dbp, &blobdir)) != 0)
+		goto err;
+
+	/*
+	 * The blob directory may not exist if blobs were enabled,
+	 * but none were created.
+	 */
+	if (__os_exists(env, blobdir, NULL) != 0)
+		goto err;
+
+	(void)sprintf(new_target + strlen(new_target),
+	     "%s%c", dbp->blob_sub_dir, '\0');
+	if ((ret = __blob_copy_dir(dbp, blobdir, new_target)) != 0)
+		goto err;
+
+err:	if (blobdir != NULL)
+		__os_free(env, blobdir);
+	if (metafname != NULL)
+		__os_free(env, metafname);
+	if (fullname != NULL)
+		__os_free(env, fullname);
+	return (ret);
+}
+
+/*
+ * __blob_copy_dir --
+ *	Copy all files in the given directory, and all files
+ *	in all sub-directories.
+ */
+static int
+__blob_copy_dir(dbp, dir, target)
+	DB *dbp;
+	const char *dir;
+	const char *target;
+{
+	DB_THREAD_INFO *ip;
+	ENV *env;
+	char **dirs, full_path[DB_MAXPATHLEN], new_target[DB_MAXPATHLEN];
+	int count, i, isdir, ret;
+
+	env = dbp->env;
+	count = 0;
+	dirs = NULL;
+
+	/* Create the directory sturcture in the target directory. */
+	if ((ret = __db_mkpath(env, target)) != 0)
+		goto err;
+
+	ENV_GET_THREAD_INFO(env, ip);
+	/* Get a list of all files in the directory. */
+	if ((ret = __os_dirlist(env, dir, 1, &dirs, &count)) != 0)
+		goto err;
+
+	for (i = 0; i < count; i++) {
+		(void)sprintf(full_path, "%s%c%s%c",
+		    dir, PATH_SEPARATOR[0], dirs[i], '\0');
+
+		if (__os_exists(env, full_path, &isdir) != 0)
+			continue;
+
+		/*
+		 * If it is a directory, copy the files in it.
+		 * Else if it is the meta database, call __db_dbbackup, else
+		 * copy the file.
+		 */
+		if (isdir) {
+			(void)sprintf(new_target,
+			    "%s%c%s%c%c", target, PATH_SEPARATOR[0],
+			    dirs[i], PATH_SEPARATOR[0], '\0');
+			if ((ret = __blob_copy_dir(
+			    dbp, full_path, new_target)) != 0)
+				goto err;
+		} else {
+			if (strcmp(dirs[i], BLOB_META_FILE_NAME) == 0) {
+				(void)sprintf(full_path, "%s%c%s%c",
+				    dbp->blob_sub_dir,
+				    PATH_SEPARATOR[0], dirs[i], '\0');
+				if ((ret = __db_dbbackup(dbp->dbenv, ip,
+				    full_path, target, 0, 0,
+				    BLOB_META_FILE_NAME)) != 0)
+					goto err;
+			} else {
+				if ((ret = backup_data_copy(
+				    dbp->dbenv, dirs[i], dir, target, 0)) != 0)
+					goto err;
+			}
+		}
+	}
+
+err:	
+	if (dirs != NULL)
+		__os_dirfree(env, dirs, count);
+	return (ret);
+}
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c
index b455ff23..be4c6b01 100644
--- a/src/btree/bt_compact.c
+++ b/src/btree/bt_compact.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -22,13 +22,16 @@ static int __bam_csearch __P((DBC *, DBT *, u_int32_t, int));
 static int __bam_lock_tree __P((DBC *, EPG *, EPG *csp, u_int32_t, u_int32_t));
 static int __bam_lock_subtree __P((DBC *, PAGE *, u_int32_t, u_int32_t));
 static int __bam_merge __P((DBC *,
-     DBC *,  u_int32_t, DBT *, DB_COMPACT *,int *));
-static int __bam_merge_internal __P((DBC *, DBC *, int, DB_COMPACT *, int *));
+     DBC *,  u_int32_t, DBT *, DB_COMPACT *, int *, int *));
+static int __bam_merge_internal __P((DBC *,
+    DBC *, int, DB_COMPACT *, int *, int *));
 static int __bam_merge_pages __P((DBC *, DBC *, DB_COMPACT *));
-static int __bam_merge_records __P((DBC *, DBC*,  u_int32_t, DB_COMPACT *));
-static int __bam_truncate_internal_overflow __P((DBC *, PAGE *, DB_COMPACT *));
+static int __bam_merge_records __P((DBC *,
+    DBC *,  u_int32_t, DB_COMPACT *, int *));
+static int __bam_truncate_internal_overflow __P((DBC *,
+    PAGE *, DB_COMPACT *, int *));
 static int __bam_truncate_root_page __P((DBC *,
-     PAGE *, u_int32_t, DB_COMPACT *));
+    PAGE *, u_int32_t, DB_COMPACT *, int *));
 
 #ifdef HAVE_FTRUNCATE
 static int __bam_savekey __P((DBC *, int, DBT *));
@@ -145,13 +148,13 @@ __bam_csearch(dbc, start, sflag, level)
  * PUBLIC:      DBT *, DBT *, u_int32_t, int *, DB_COMPACT *, int *));
  */
 int
-__bam_compact_int(dbc, start, stop, factor, spanp, c_data, donep)
+__bam_compact_int(dbc, start, stop, factor, spanp, c_data, isdonep)
 	DBC *dbc;
 	DBT *start, *stop;
 	u_int32_t factor;
 	int *spanp;
 	DB_COMPACT *c_data;
-	int *donep;
+	int *isdonep;
 {
 	BTREE_CURSOR *cp, *ncp;
 	DB *dbp;
@@ -168,7 +171,7 @@ __bam_compact_int(dbc, start, stop, factor, spanp, c_data, donep)
 	int check_dups, check_trunc, clear_root, do_commit, isdone;
 	int merged, next_p, pgs_done, ret, t_ret, tdone;
 
-#ifdef	DEBUG
+#ifdef	DEBUG_WOP
 #define	CTRACE(dbc, location, t, start, f) do {				\
 		DBT __trace;						\
 		DB_SET_DBT(__trace, t, strlen(t));			\
@@ -182,8 +185,8 @@ __bam_compact_int(dbc, start, stop, factor, spanp, c_data, donep)
 		CTRACE(dbc, location, __buf, start, f);			\
 	} while (0)
 #else
-#define	CTRACE(dbc, location, t, start, f)
-#define	PTRACE(dbc, location, p, start, f)
+#define	CTRACE(dbc, location, t, start, f)	NOP_STATEMENT
+#define	PTRACE(dbc, location, p, start, f)	NOP_STATEMENT
 #endif
 
 	ndbc = NULL;
@@ -551,11 +554,10 @@ retry:	pg = NULL;
 				if (ret != 0)
 					goto err1;
 			}
-			pgs_done++;
-			/* Get a fresh low numbered page. */
+			/* Try to swap to a lower numbered page. */
 			if ((ret = __db_exchange_page(dbc,
 			    &cp->csp->page, ncp->csp->page,
-			    PGNO_INVALID, DB_EXCH_DEFAULT)) != 0)
+			    PGNO_INVALID, DB_EXCH_DEFAULT, &pgs_done)) != 0)
 				goto err1;
 			if ((ret = __TLPUT(dbc, prev_lock)) != 0)
 				goto err1;
@@ -598,8 +600,8 @@ retry:	pg = NULL;
 		merged = 0;
 		for (epg = cp->sp; epg != cp->csp; epg++) {
 			PTRACE(dbc, "PMerge", PGNO(epg->page), start, 0);
-			if ((ret = __bam_merge_internal(dbc,
-			    ndbc, LEVEL(epg->page), c_data, &merged)) != 0)
+			if ((ret = __bam_merge_internal(dbc, ndbc,
+			    LEVEL(epg->page), c_data, &merged, &pgs_done)) != 0)
 				break;
 			if (merged)
 				break;
@@ -627,7 +629,7 @@ retry:	pg = NULL;
 		}
 		PTRACE(dbc, "SMerge", PGNO(cp->csp->page), start, 0);
 
-		/* if we remove the next page, then we need its next locked */
+		/* If we remove the next page, then we need its next locked. */
 		npgno = NEXT_PGNO(ncp->csp->page);
 		if (npgno != PGNO_INVALID) {
 			TRY_LOCK2(dbc, ndbc, npgno,
@@ -637,9 +639,8 @@ retry:	pg = NULL;
 		}
 		/*lint -e{794} */
 		if ((ret = __bam_merge(dbc,
-		     ndbc, factor, stop, c_data, &isdone)) != 0)
+		     ndbc, factor, stop, c_data, &isdone, &pgs_done)) != 0)
 			goto err1;
-		pgs_done++;
 		/*
 		 * __bam_merge could have freed our stack if it
 		 * deleted a page possibly collapsing the tree.
@@ -722,8 +723,8 @@ retry:	pg = NULL;
 				/* Get a fresh low numbered page. */
 				pgno = PGNO(pg);
 				if ((ret = __db_exchange_page(dbc,
-				    &cp->csp->page, NULL,
-				    PGNO_INVALID, DB_EXCH_DEFAULT)) != 0)
+				    &cp->csp->page, NULL, PGNO_INVALID,
+				    DB_EXCH_DEFAULT, &pgs_done)) != 0)
 					goto err1;
 				if ((ret = __TLPUT(dbc, prev_lock)) != 0)
 					goto err1;
@@ -734,10 +735,7 @@ retry:	pg = NULL;
 				LOCK_INIT(next_lock);
 				saved_pgno = PGNO_INVALID;
 				pg = cp->csp->page;
-				if (pgno != PGNO(pg)) {
-					pgs_done++;
-					pgno = PGNO(pg);
-				}
+				pgno = PGNO(pg);
 			}
 			/*
 			 * If we are going to leave this parent commit
@@ -752,7 +750,7 @@ retry:	pg = NULL;
 			goto next_page;
 		}
 
-		/* If they have the same parent, just dup the cursor */
+		/* If they have the same parent, just dup the cursor. */
 		if (ndbc != NULL && (ret = __dbc_close(ndbc)) != 0)
 			goto err1;
 		if ((ret = __dbc_dup(dbc, &ndbc, DB_POSITION)) != 0)
@@ -842,17 +840,15 @@ retry:	pg = NULL;
 			pgno = PGNO(pg);
 			/* Get a fresh low numbered page. */
 			if ((ret = __db_exchange_page(dbc, &cp->csp->page,
-			    npg, PGNO_INVALID, DB_EXCH_DEFAULT)) != 0)
+			    npg, PGNO_INVALID,
+			    DB_EXCH_DEFAULT, &pgs_done)) != 0)
 				goto err1;
 			if ((ret = __TLPUT(dbc, prev_lock)) != 0)
 				goto err1;
 			LOCK_INIT(prev_lock);
 			prev_pgno = PGNO_INVALID;
 			pg = cp->csp->page;
-			if (pgno != PGNO(pg)) {
-				pgs_done++;
-				pgno = PGNO(pg);
-			}
+			pgno = PGNO(pg);
 		}
 		c_data->compact_pages_examine++;
 
@@ -887,11 +883,9 @@ retry:	pg = NULL;
 		 */
 		PTRACE(dbc, "Merge", PGNO(cp->csp->page), start, 0);
 		if ((ret = __bam_merge(dbc,
-		     ndbc, factor, stop, c_data, &isdone)) != 0)
+		     ndbc, factor, stop, c_data, &isdone, &pgs_done)) != 0)
 			goto err1;
 
-		pgs_done++;
-
 		if ((ret = __TLPUT(dbc, nnext_lock)) != 0)
 			goto err1;
 		LOCK_INIT(nnext_lock);
@@ -932,7 +926,7 @@ next_page:
 	pg = NULL;
 	if ((ret = __bam_stkrel(dbc, STK_PGONLY)) != 0)
 		goto err;
-	if (npgno != PGNO_INVALID &&
+	if (npgno != PGNO_INVALID && !do_commit &&
 	    (ret = __db_lget(dbc, 0, npgno, DB_LOCK_READ, 0, &next_lock)) != 0)
 		goto err;
 	if ((ret = __bam_stkrel(dbc, pgs_done == 0 ? STK_NOLOCK : 0)) != 0)
@@ -1010,9 +1004,6 @@ err:	/*
 	if ((t_ret = __bam_stkrel(dbc, sflag)) != 0 && ret == 0)
 		ret = t_ret;
 
-	if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0)
-		ret = t_ret;
-
 	if (pg != NULL && (t_ret =
 	     __memp_fput(dbmp,
 		  dbc->thread_info, pg, dbc->priority) != 0) && ret == 0)
@@ -1022,7 +1013,11 @@ err:	/*
 		  dbc->thread_info, npg, dbc->priority) != 0) && ret == 0)
 		ret = t_ret;
 
-out:	*donep = isdone;
+out:
+	if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0)
+		ret = t_ret;
+
+	*isdonep = isdone;
 
 	/* For OPD trees return if we did anything in the span variable. */
 	if (F_ISSET(dbc, DBC_OPD))
@@ -1035,12 +1030,13 @@ out:	*donep = isdone;
  * __bam_merge -- do actual merging of leaf pages.
  */
 static int
-__bam_merge(dbc, ndbc, factor, stop, c_data, donep)
+__bam_merge(dbc, ndbc, factor, stop, c_data, isdonep, pgs_donep)
 	DBC *dbc, *ndbc;
 	u_int32_t factor;
 	DBT *stop;
 	DB_COMPACT *c_data;
-	int *donep;
+	int *isdonep;
+	int *pgs_donep;
 {
 	BTREE_CURSOR *cp, *ncp;
 	DB *dbp;
@@ -1064,9 +1060,9 @@ __bam_merge(dbc, ndbc, factor, stop, c_data, donep)
 
 	/* Find if the stopping point is on this page. */
 	if (stop != NULL && stop->size != 0) {
-		if ((ret = __bam_compact_isdone(dbc, stop, npg, donep)) != 0)
+		if ((ret = __bam_compact_isdone(dbc, stop, npg, isdonep)) != 0)
 			return (ret);
-		if (*donep)
+		if (*isdonep)
 			return (0);
 	}
 
@@ -1080,20 +1076,23 @@ __bam_merge(dbc, ndbc, factor, stop, c_data, donep)
 	    ncp->csp[-1].indx == 0 && ncp->csp[-1].entries != 1) ||
 	    (int)(P_FREESPACE(dbp, pg) -
 	    ((dbp->pgsize - P_OVERHEAD(dbp)) -
-	    P_FREESPACE(dbp, npg))) < (int)factor)
-		ret = __bam_merge_records(dbc, ndbc, factor, c_data);
-	else
+	    P_FREESPACE(dbp, npg))) < (int)factor) {
+		ret = __bam_merge_records(dbc, ndbc, factor, c_data, pgs_donep);
+	} else {
 		/*lint -e{794} */
 free_page:	ret = __bam_merge_pages(dbc, ndbc, c_data);
+		(*pgs_donep)++;
+	}
 
 	return (ret);
 }
 
 static int
-__bam_merge_records(dbc, ndbc, factor, c_data)
+__bam_merge_records(dbc, ndbc, factor, c_data, pgs_donep)
 	DBC *dbc, *ndbc;
 	u_int32_t factor;
 	DB_COMPACT *c_data;
+	int *pgs_donep;
 {
 	BINTERNAL *bi;
 	BKEYDATA *bk, *tmp_bk;
@@ -1126,8 +1125,8 @@ __bam_merge_records(dbc, ndbc, factor, c_data)
 	if (c_data->compact_truncate != PGNO_INVALID &&
 	     PGNO(ncp->csp->page) > c_data->compact_truncate) {
 		/* Get a fresh low numbered page. */
-		if ((ret = __db_exchange_page(ndbc,
-		   &ncp->csp->page, pg, PGNO_INVALID, DB_EXCH_DEFAULT)) != 0)
+		if ((ret = __db_exchange_page(ndbc, &ncp->csp->page,
+		    pg, PGNO_INVALID, DB_EXCH_DEFAULT, pgs_donep)) != 0)
 			goto err;
 	}
 
@@ -1197,6 +1196,7 @@ __bam_merge_records(dbc, ndbc, factor, c_data)
 	/* If we have hit the first record then there is nothing we can move. */
 	if (indx == 0)
 		goto done;
+	(*pgs_donep)++;
 	if (TYPE(pg) != P_LBTREE && TYPE(pg) != P_LDUP) {
 		if (indx == nent)
 			return (__bam_merge_pages(dbc, ndbc, c_data));
@@ -1237,7 +1237,8 @@ __bam_merge_records(dbc, ndbc, factor, c_data)
 		indx -= adj;
 	}
 	bk = GET_BKEYDATA(dbp, npg, indx);
-	len = (B_TYPE(bk->type) != B_KEYDATA) ? BOVERFLOW_SIZE : bk->len;
+	len = (B_TYPE(bk->type) == B_KEYDATA) ? bk->len :
+	    ((B_TYPE(bk->type) == B_BLOB) ? BBLOB_DSIZE : BOVERFLOW_SIZE);
 	if (indx != 0 && BINTERNAL_SIZE(len) >= pfree) {
 		if (F_ISSET(dbc, DBC_OPD)) {
 			if (dbp->dup_compare == __bam_defcmp)
@@ -1281,8 +1282,9 @@ noprefix:
 		} while (indx != 0 &&  ninp[indx] == ninp[indx - adj]);
 
 		bk = GET_BKEYDATA(dbp, npg, indx);
-		len =
-		    (B_TYPE(bk->type) != B_KEYDATA) ? BOVERFLOW_SIZE : bk->len;
+		len = (B_TYPE(bk->type) == B_KEYDATA) ?
+		    bk->len : ((B_TYPE(bk->type) == B_BLOB) ?
+		    BBLOB_DSIZE : BOVERFLOW_SIZE);
 	}
 
 	/*
@@ -1346,6 +1348,13 @@ no_check: is_dup = first_dup = next_dup = 0;
 			     BOVERFLOW_SIZE, &data, NULL)) != 0)
 				goto err;
 			break;
+		case B_BLOB:
+			data.size = BBLOB_SIZE;
+			data.data = bk;
+			if ((ret = __db_pitem(dbc, pg,
+			    pind, BBLOB_SIZE, &data, NULL)) != 0)
+				goto err;
+			break;
 		default:
 			__db_errx(env, DB_STR_A("1022",
 			    "Unknown record format, page %lu, indx 0",
@@ -1538,15 +1547,20 @@ err:	return (ret);
 /*
  * __bam_merge_internal --
  *	Merge internal nodes of the tree.
+ *
+ *	The first key of an internal page does not have a guaranteed-
+ *	useful key.
  */
 static int
-__bam_merge_internal(dbc, ndbc, level, c_data, merged)
+__bam_merge_internal(dbc, ndbc, level, c_data, merged, pgs_donep)
 	DBC *dbc, *ndbc;
 	int level;
 	DB_COMPACT *c_data;
 	int *merged;
+	int *pgs_donep;
 {
 	BINTERNAL bi, *bip, *fip;
+	BOVERFLOW bo;
 	BTREE_CURSOR *cp, *ncp;
 	DB *dbp;
 	DBT data, hdr;
@@ -1579,7 +1593,6 @@ __bam_merge_internal(dbc, ndbc, level, c_data, merged)
 	dbmp = dbp->mpf;
 	cp = (BTREE_CURSOR *)dbc->internal;
 	ncp = (BTREE_CURSOR *)ndbc->internal;
-	*merged = 0;
 	ret = 0;
 
 	/*
@@ -1608,11 +1621,11 @@ __bam_merge_internal(dbc, ndbc, level, c_data, merged)
 		 * Check for overflow keys on both pages while we have
 		 * them locked.
 		 */
-		 if ((ret =
-		      __bam_truncate_internal_overflow(dbc, pg, c_data)) != 0)
+		if ((ret = __bam_truncate_internal_overflow(dbc,
+		    pg, c_data, pgs_donep)) != 0)
 			goto err;
-		 if ((ret =
-		      __bam_truncate_internal_overflow(dbc, npg, c_data)) != 0)
+		if ((ret = __bam_truncate_internal_overflow(dbc,
+		    npg, c_data, pgs_donep)) != 0)
 			goto err;
 	}
 
@@ -1624,7 +1637,12 @@ __bam_merge_internal(dbc, ndbc, level, c_data, merged)
 	 */
 	fip = NULL;
 	if (TYPE(pg) == P_IBTREE) {
-		/* See where we run out of space. */
+		/* See where we run out of space. This does not yet include
+		 * whatever extra pages are needed if an overflow key is
+		 * going to be added to one or more parent pages. It would be
+		 * better to use as little of the key that as necessary, though
+		 * the effort of determining that might not be worthwhile.
+		 */
 		freespace = P_FREESPACE(dbp, pg);
 		/*
 		 * The leftmost key of an internal page is not accurate.
@@ -1704,12 +1722,37 @@ fits:	memset(&bi, 0, sizeof(bi));
 			if (fip == NULL) {
 				data.size = bip->len;
 				data.data = bip->data;
+			} else if (fip->type == B_OVERFLOW) {
+				DB_ASSERT(dbc->env,
+				    fip->len == sizeof(BOVERFLOW));
+				/* Cast to "BOVERFLOW *" to calm down lint. */
+				memmove(&bo,
+				    (BOVERFLOW *)fip->data, sizeof(BOVERFLOW));
+				memset(&hdr, 0, sizeof(hdr));
+				if ((ret = __db_goff(dbc, &hdr, bo.tlen,
+				     bo.pgno, &hdr.data, &hdr.size)) == 0)
+					ret = __db_poff(dbc, &hdr, &bo.pgno);
+				if (hdr.data != NULL)
+					__os_free(dbp->env, hdr.data);
+				if (ret != 0)
+					return (ret);
+				data.size = sizeof(bo);
+				data.data = &bo;
+			} else if (fip->type == B_BLOB) {
+				/* Blobs should never appear as keys. */
+				DB_ASSERT(dbc->env,
+				    !(fip->type == B_BLOB &&
+				    TYPE(pg) == P_IBTREE));
 			} else {
 				data.size = fip->len;
 				data.data = fip->data;
 			}
 			bi.len = data.size;
-			B_TSET(bi.type, bip->type);
+			/*
+			 * Set bi.type according to the data's type, to ensure
+			 * that it is B_OVERLOW iff the data is BOVERFLOW.
+			 */
+			B_TSET(bi.type, fip == NULL ? bip->type : fip->type);
 			bi.pgno = bip->pgno;
 			bi.nrecs = bip->nrecs;
 			hdr.data = &bi;
@@ -1750,7 +1793,12 @@ fits:	memset(&bi, 0, sizeof(bi));
 		if ((ret = __db_pitem(dbc, pg, pind, size, &hdr, &data)) != 0)
 			goto err;
 		pind++;
-		if (fip != NULL) {
+		/* add bip test so fortify does not complain */
+		if (fip != NULL && bip != NULL) {
+			if (B_TYPE(bip->type) == B_OVERFLOW &&
+			    (ret = __db_doff(dbc,
+			    ((BOVERFLOW *)bip->data)->pgno)) != 0)
+				goto err;
 			/* reset size to be for the record being deleted. */
 			size = BINTERNAL_SIZE(bip->len);
 			fip = NULL;
@@ -1848,14 +1896,14 @@ fits:	memset(&bi, 0, sizeof(bi));
 		    PGNO(npg) > c_data->compact_truncate &&
 		    ncp->csp != ncp->sp) {
 			if ((ret = __db_exchange_page(ndbc, &ncp->csp->page,
-			    pg, PGNO_INVALID, DB_EXCH_DEFAULT)) != 0)
+			    pg, PGNO_INVALID, DB_EXCH_DEFAULT, pgs_donep)) != 0)
 				goto err;
 		}
 		if (c_data->compact_truncate != PGNO_INVALID &&
 		     PGNO(pg) > c_data->compact_truncate && cp->csp != cp->sp) {
 			if ((ret = __db_exchange_page(dbc, &cp->csp->page,
 			    ncp->csp->page,
-			    PGNO_INVALID, DB_EXCH_DEFAULT)) != 0)
+			    PGNO_INVALID, DB_EXCH_DEFAULT, pgs_donep)) != 0)
 				goto err;
 		}
 	}
@@ -1875,13 +1923,13 @@ err:	cp->csp = save_csp;
  * We may or may not have a write lock on this page.
  */
 static int
-__bam_compact_dups(dbc, ppg, factor, have_lock, c_data, donep)
+__bam_compact_dups(dbc, ppg, factor, have_lock, c_data, pgs_donep)
 	DBC *dbc;
 	PAGE **ppg;
 	u_int32_t factor;
 	int have_lock;
 	DB_COMPACT *c_data;
-	int *donep;
+	int *pgs_donep;
 {
 	BOVERFLOW *bo;
 	BTREE_CURSOR *cp;
@@ -1896,15 +1944,19 @@ __bam_compact_dups(dbc, ppg, factor, have_lock, c_data, donep)
 	DB_ASSERT(NULL, dbc != NULL);
 	dbp = dbc->dbp;
 	dbmp = dbp->mpf;
+	/* XXX Don't reserve any free bytes (Force 100% fillfactor) in OPD trees
+	 * to ensure forward progress.
+	 */
+	factor = 0;
 	cp = (BTREE_CURSOR *)dbc->internal;
 
 	for (i = 0; i <  NUM_ENT(*ppg); i++) {
 		bo = GET_BOVERFLOW(dbp, *ppg, i);
-		if (B_TYPE(bo->type) == B_KEYDATA)
+		if (B_TYPE(bo->type) == B_KEYDATA ||
+		    B_TYPE(bo->type) == B_BLOB)
 			continue;
 		c_data->compact_pages_examine++;
 		if (bo->pgno > c_data->compact_truncate) {
-			(*donep)++;
 			if (!have_lock) {
 				/*
 				 * The caller should have the page at
@@ -1925,8 +1977,9 @@ __bam_compact_dups(dbc, ppg, factor, have_lock, c_data, donep)
 				    dbc->txn, DB_MPOOL_DIRTY, ppg)) != 0)
 					goto err;
 			}
+			pgno = bo->pgno;
 			if ((ret = __bam_truncate_root_page(dbc,
-			     *ppg, i, c_data)) != 0)
+			     *ppg, i, c_data, pgs_donep)) != 0)
 				goto err;
 			/* Just in case it should move.  Could it? */
 			bo = GET_BOVERFLOW(dbp, *ppg, i);
@@ -1934,13 +1987,13 @@ __bam_compact_dups(dbc, ppg, factor, have_lock, c_data, donep)
 
 		if (B_TYPE(bo->type) == B_OVERFLOW) {
 			if ((ret = __db_truncate_overflow(dbc,
-			    bo->pgno, have_lock ? NULL : ppg, c_data)) != 0)
+			    bo->pgno, have_lock ? NULL : ppg,
+			    c_data, pgs_donep)) != 0)
 				goto err;
-			(*donep)++;
 			continue;
 		}
 		if ((ret = __bam_compact_opd(dbc, bo->pgno,
-		    have_lock ? NULL : ppg, factor, c_data, donep)) != 0)
+		    have_lock ? NULL : ppg, factor, c_data, pgs_donep)) != 0)
 			goto err;
 	}
 
@@ -1955,13 +2008,13 @@ err:
  * PUBLIC:      db_pgno_t, PAGE **, u_int32_t, DB_COMPACT *, int *));
  */
 int
-__bam_compact_opd(dbc, root_pgno, ppg, factor, c_data, donep)
+__bam_compact_opd(dbc, root_pgno, ppg, factor, c_data, pgs_donep)
 	DBC *dbc;
 	db_pgno_t root_pgno;
 	PAGE **ppg;
 	u_int32_t factor;
 	DB_COMPACT *c_data;
-	int *donep;
+	int *pgs_donep;
 {
 	BTREE_CURSOR *cp;
 	DBC *opd;
@@ -2021,7 +2074,7 @@ __bam_compact_opd(dbc, root_pgno, ppg, factor, c_data, donep)
 		     NULL, factor, &span, c_data, &isdone)) != 0)
 			break;
 		/* For OPD the number of pages dirtied is returned in span. */
-		*donep += span;
+		*pgs_donep += span;
 	} while (!isdone);
 
 	if (start.data != NULL)
@@ -2041,11 +2094,12 @@ done:
  * The page is reference by the pg/indx passed in.
  */
 static int
-__bam_truncate_root_page(dbc, pg, indx, c_data)
+__bam_truncate_root_page(dbc, pg, indx, c_data, pgs_donep)
 	DBC *dbc;
 	PAGE *pg;
 	u_int32_t indx;
 	DB_COMPACT *c_data;
+	int *pgs_donep;
 {
 	BINTERNAL *bi;
 	BOVERFLOW *bo;
@@ -2053,8 +2107,8 @@ __bam_truncate_root_page(dbc, pg, indx, c_data)
 	db_pgno_t *pgnop;
 	u_int32_t tlen;
 
-	COMPQUIET(c_data, NULL);
 	COMPQUIET(bo, NULL);
+	COMPQUIET(c_data, NULL);
 	dbp = dbc->dbp;
 	if (TYPE(pg) == P_IBTREE) {
 		bi = GET_BINTERNAL(dbp, pg, indx);
@@ -2075,7 +2129,7 @@ __bam_truncate_root_page(dbc, pg, indx, c_data)
 
 	DB_ASSERT(dbp->env, IS_DIRTY(pg));
 
-	return (__db_truncate_root(dbc, pg, indx, pgnop, tlen));
+	return (__db_truncate_root(dbc, pg, indx, pgnop, tlen, pgs_donep));
 }
 
 /*
@@ -2086,10 +2140,11 @@ __bam_truncate_root_page(dbc, pg, indx, c_data)
  * nodes they will get copied adding pages to the database.
  */
 static int
-__bam_truncate_internal_overflow(dbc, page, c_data)
+__bam_truncate_internal_overflow(dbc, page, c_data, pgs_donep)
 	DBC *dbc;
 	PAGE *page;
 	DB_COMPACT *c_data;
+	int *pgs_donep;
 {
 	BINTERNAL *bi;
 	BOVERFLOW *bo;
@@ -2104,10 +2159,11 @@ __bam_truncate_internal_overflow(dbc, page, c_data)
 			continue;
 		bo = (BOVERFLOW *)(bi->data);
 		if (bo->pgno > c_data->compact_truncate && (ret =
-		     __bam_truncate_root_page(dbc, page, indx, c_data)) != 0)
+		     __bam_truncate_root_page(dbc, page,
+		     indx, c_data, pgs_donep)) != 0)
 			break;
-		if ((ret = __db_truncate_overflow(
-		     dbc, bo->pgno, NULL, c_data)) != 0)
+		if ((ret = __db_truncate_overflow(dbc,
+		    bo->pgno, NULL, c_data, pgs_donep)) != 0)
 			break;
 	}
 	return (ret);
@@ -2142,7 +2198,7 @@ __bam_compact_isdone(dbc, stop, pg, isdone)
 	} else {
 		DB_ASSERT(dbc->dbp->env, TYPE(pg) == P_LBTREE);
 		if ((ret = __bam_cmp(dbc, stop, pg, 0,
-		    t->bt_compare, &cmp)) != 0)
+		    t->bt_compare, &cmp, NULL)) != 0)
 			return (ret);
 
 		*isdone = cmp <= 0;
@@ -2328,7 +2384,7 @@ __bam_savekey(dbc, next, start)
 			if (len == 0) {
 no_key:				__db_errx(env, DB_STR("1023",
 				    "Compact cannot handle zero length key"));
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto err;
 			}
 		} else {
@@ -2360,14 +2416,15 @@ retry:	return (DB_LOCK_NOTGRANTED);
  *	Find high numbered pages in the internal nodes of a tree and
  *	swap them for lower numbered pages.
  * PUBLIC:  int __bam_truncate_ipages __P((DB *,
- * PUBLIC:    DB_THREAD_INFO *, DB_TXN *, DB_COMPACT *));
+ * PUBLIC:    DB_THREAD_INFO *, DB_TXN *, DB_COMPACT *, int *));
  */
 int
-__bam_truncate_ipages(dbp, ip, txn, c_data)
+__bam_truncate_ipages(dbp, ip, txn, c_data, pgs_donep)
 	DB *dbp;
 	DB_THREAD_INFO *ip;
 	DB_TXN *txn;
 	DB_COMPACT *c_data;
+	int *pgs_donep;
 {
 	BTMETA *meta;
 	BTREE *bt;
@@ -2480,8 +2537,9 @@ new_txn:
 		pgno = PGNO(cp->csp->page);
 
 		if (pgno > c_data->compact_truncate) {
-			if ((ret = __db_exchange_page(dbc, &cp->csp->page,
-			    NULL, PGNO_INVALID, DB_EXCH_DEFAULT)) != 0)
+			if ((ret = __db_exchange_page(dbc,
+			    &cp->csp->page, NULL, PGNO_INVALID,
+			    DB_EXCH_DEFAULT, pgs_donep)) != 0)
 				goto err;
 		}
 
@@ -2561,7 +2619,8 @@ again:	if (F_ISSET(dbp, DB_AM_SUBDB) &&
 		}
 		if (PGNO(meta) > c_data->compact_truncate) {
 			dbmeta = (DBMETA *)meta;
-			ret = __db_move_metadata(dbc, &dbmeta, c_data);
+			ret = __db_move_metadata(dbc,
+			    &dbmeta, c_data, pgs_donep);
 			meta = (BTMETA *)dbmeta;
 			if (ret != 0)
 				goto err;
@@ -2583,8 +2642,8 @@ again:	if (F_ISSET(dbp, DB_AM_SUBDB) &&
 			 * page latch is released.
 			 */
 			++dbp->mpf->mfp->revision;
-			if ((ret = __db_exchange_page(dbc,
-			    &root, NULL, PGNO_INVALID, DB_EXCH_FREE)) != 0)
+			if ((ret = __db_exchange_page(dbc, &root, NULL,
+			    PGNO_INVALID, DB_EXCH_FREE, pgs_donep)) != 0)
 				goto err;
 			if (PGNO(root) == bt->bt_root)
 				goto err;
diff --git a/src/btree/bt_compare.c b/src/btree/bt_compare.c
index 5c009071..8923c5fa 100644
--- a/src/btree/bt_compare.c
+++ b/src/btree/bt_compare.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -49,27 +49,39 @@
 
 /*
  * __bam_cmp --
- *	Compare a key to a given record.
+ *	Compare a key to a given record. We always start the comparison
+ *	at an offset and update the offset with longest matching count
+ *	after the comparison.
  *
  * PUBLIC: int __bam_cmp __P((DBC *, const DBT *, PAGE *, u_int32_t,
- * PUBLIC:    int (*)(DB *, const DBT *, const DBT *), int *));
+ * PUBLIC:    int (*)(DB *, const DBT *, const DBT *, size_t *),
+ * PUBLIC:    int *, size_t *));
  */
 int
-__bam_cmp(dbc, dbt, h, indx, func, cmpp)
+__bam_cmp(dbc, dbt, h, indx, func, cmpp, locp)
 	DBC *dbc;
 	const DBT *dbt;
 	PAGE *h;
 	u_int32_t indx;
-	int (*func)__P((DB *, const DBT *, const DBT *));
+	int (*func)__P((DB *, const DBT *, const DBT *, size_t *));
 	int *cmpp;
+	size_t *locp;
 {
+	BBLOB bl;
 	BINTERNAL *bi;
 	BKEYDATA *bk;
 	BOVERFLOW *bo;
 	DB *dbp;
 	DBT pg_dbt;
+	off_t blob_size;
+	int ret;
+	db_seq_t blob_id;
 
 	dbp = dbc->dbp;
+	ret = 0;
+
+	/* Assert that the func is non-Null. */
+	DB_ASSERT(dbp->env, func != NULL);
 
 	/*
 	 * Returns:
@@ -91,11 +103,49 @@ __bam_cmp(dbc, dbt, h, indx, func, cmpp)
 		bk = GET_BKEYDATA(dbp, h, indx);
 		if (B_TYPE(bk->type) == B_OVERFLOW)
 			bo = (BOVERFLOW *)bk;
-		else {
+		else if (B_TYPE(bk->type) == B_BLOB) {
+			/*
+			 * This is very slow, but since blobs cannot be
+			 * in databases with duplicates or be keys, it should
+			 * only happen when using DB_GET_BOTH or DB_SET.
+			 */
+			memcpy(&bl, bk, BBLOB_SIZE);
+			memset(&pg_dbt, 0, sizeof(DBT));
+			GET_BLOB_SIZE(dbc->env, bl, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			if (blob_size > UINT32_MAX)
+				pg_dbt.size = UINT32_MAX;
+			else
+				pg_dbt.size = (u_int32_t)blob_size;
+			blob_id = (db_seq_t)bl.id;
+			pg_dbt.flags = DB_DBT_USERMEM;
+			if ((ret = __os_malloc(
+			    dbc->env, pg_dbt.size, &pg_dbt.data)) != 0)
+				return (ret);
+			pg_dbt.ulen = pg_dbt.size;
+			if ((ret = __blob_get(dbc,
+			    &pg_dbt, blob_id, blob_size, NULL, NULL)) != 0) {
+				__os_free(dbc->env, pg_dbt.data);
+				return (ret);
+			}
+			*cmpp = func(dbp, dbt, &pg_dbt, locp);
+			/*
+			 * There is no way to directly compare a blob file that
+			 * is greater in size than UINT32_MAX, so instead we
+			 * compare the data up to UINT32_MAX, and if they are
+			 * equal return that the blob is larger, since it is
+			 * longer than the input data.
+			 */
+			if (*cmpp == 0 && (blob_size > UINT32_MAX))
+				*cmpp = -1;
+			__os_free(dbc->env, pg_dbt.data);
+			return (0);
+		} else {
 			pg_dbt.app_data = NULL;
 			pg_dbt.data = bk->data;
 			pg_dbt.size = bk->len;
-			*cmpp = func(dbp, dbt, &pg_dbt);
+			*cmpp = func(dbp, dbt, &pg_dbt, locp);
 			return (0);
 		}
 		break;
@@ -123,13 +173,14 @@ __bam_cmp(dbc, dbt, h, indx, func, cmpp)
 		}
 
 		bi = GET_BINTERNAL(dbp, h, indx);
-		if (B_TYPE(bi->type) == B_OVERFLOW)
+		if (B_TYPE(bi->type) == B_OVERFLOW) {
+			DB_ASSERT(dbp->env, bi->len == BOVERFLOW_SIZE);
 			bo = (BOVERFLOW *)(bi->data);
-		else {
+		} else {
 			pg_dbt.app_data = NULL;
 			pg_dbt.data = bi->data;
 			pg_dbt.size = bi->len;
-			*cmpp = func(dbp, dbt, &pg_dbt);
+			*cmpp = func(dbp, dbt, &pg_dbt, locp);
 			return (0);
 		}
 		break;
@@ -141,42 +192,56 @@ __bam_cmp(dbc, dbt, h, indx, func, cmpp)
 	 * Overflow.
 	 */
 	return (__db_moff(dbc, dbt, bo->pgno, bo->tlen,
-	    func == __bam_defcmp ? NULL : func, cmpp));
+		    func == __bam_defcmp ? NULL : func, cmpp, locp));
 }
 
 /*
  * __bam_defcmp --
- *	Default comparison routine.
+ *	Keep track of how far along in the two keys we find matching
+ *	characters, and use that as an offset into the keys to begin
+ *	future comparisons. This will save us the overhead of always
+ *	starting the comparisons on the first character.
  *
- * PUBLIC: int __bam_defcmp __P((DB *, const DBT *, const DBT *));
+ * PUBLIC: int __bam_defcmp __P((DB *, const DBT *, const DBT *, size_t *));
  */
 int
-__bam_defcmp(dbp, a, b)
+__bam_defcmp(dbp, a, b, locp)
 	DB *dbp;
 	const DBT *a, *b;
+	size_t *locp;
 {
-	size_t len;
+	size_t len, i, start;
 	u_int8_t *p1, *p2;
 
 	COMPQUIET(dbp, NULL);
-
+	start = (locp == NULL ? 0 : *locp);
 	/*
 	 * Returns:
 	 *	< 0 if a is < b
 	 *	= 0 if a is = b
 	 *	> 0 if a is > b
 	 *
+	 * We start the comparison from 'locp' and store the last match
+	 * location in 'locp'.
+	 *
 	 * XXX
 	 * If a size_t doesn't fit into a long, or if the difference between
 	 * any two characters doesn't fit into an int, this routine can lose.
 	 * What we need is a signed integral type that's guaranteed to be at
 	 * least as large as a size_t, and there is no such thing.
 	 */
+	p1 = (u_int8_t *)a->data + start;
+	p2 = (u_int8_t *)b->data + start;
 	len = a->size > b->size ? b->size : a->size;
-	for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2)
-		if (*p1 != *p2)
-			return ((long)*p1 - (long)*p2);
-	return ((long)a->size - (long)b->size);
+	for (i = start; i < len; ++p1, ++p2, ++i)
+		if (*p1 != *p2) {
+			if (locp != NULL)
+				*locp = i;
+			return (*p1 < *p2 ? -1 : 1);
+		}
+	if (locp != NULL)
+		*locp = len;
+	return (a->size == b->size ? 0 : (a->size < b->size ? -1 : 1));
 }
 
 /*
diff --git a/src/btree/bt_compress.c b/src/btree/bt_compress.c
index 3f293461..479e7248 100644
--- a/src/btree/bt_compress.c
+++ b/src/btree/bt_compress.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 
 #include "db_config.h"
@@ -352,16 +352,20 @@ __bam_compress_marshal_data(dbp, data, destbuf)
  * __bam_compress_dupcmp --
  *	Duplicate comparison function for compressed BTrees.
  *
- * PUBLIC: int __bam_compress_dupcmp __P((DB *, const DBT *, const DBT *));
+ * PUBLIC: int __bam_compress_dupcmp __P((DB *, const DBT *, const DBT *,
+ * PUBLIC:     size_t *));
  */
 int
-__bam_compress_dupcmp(db, a, b)
+__bam_compress_dupcmp(db, a, b, locp)
 	DB *db;
 	const DBT *a;
 	const DBT *b;
+	size_t *locp;
 {
 	DBT dcmp_a, dcmp_b;
 
+	COMPQUIET(locp, NULL);
+
 	/* Decompress the initial data in a */
 	CMP_UNMARSHAL_DATA(a, &dcmp_a);
 	dcmp_a.ulen = 0;
@@ -380,7 +384,7 @@ __bam_compress_dupcmp(db, a, b)
 
 	/* Call the user's duplicate compare function */
 	return ((BTREE *)db->bt_internal)->
-		compress_dup_compare(db, &dcmp_a, &dcmp_b);
+		compress_dup_compare(db, &dcmp_a, &dcmp_b, NULL);
 }
 
 /*
@@ -636,7 +640,7 @@ __bamc_next_decompress(dbc)
 	db = dbc->dbp;
 
 	if (cp->compcursor >= cp->compend)
-		return (DB_NOTFOUND);
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 
 	cp->prevKey = cp->currentKey;
 	cp->prevData = cp->currentData;
@@ -1251,7 +1255,7 @@ __bamc_compress_merge_delete(dbc, stream, countp)
 					 * chunk, but don't delete any more
 					 * entries.
 					 */
-					bulk_ret = DB_NOTFOUND;
+					bulk_ret = DBC_ERR(dbc, DB_NOTFOUND);
 					moreStream = 0;
 					iSmallEnough = 0;
 				} else
@@ -1318,7 +1322,7 @@ __bamc_compress_merge_delete(dbc, stream, countp)
 	CMP_FREE_DBT(env, &nextk);
 	CMP_FREE_DBT(env, &nextc);
 
-	return (ret != 0 ? ret : bulk_ret);
+	return (ret != 0 ? ret : DBC_ERR(dbc, bulk_ret));
 }
 
 /*
@@ -1389,7 +1393,7 @@ __bamc_compress_merge_delete_dups(dbc, stream, countp)
 				 * in the database
 				 */
 				if (ifound == 0) {
-					bulk_ret = DB_NOTFOUND;
+					bulk_ret = DBC_ERR(dbc, DB_NOTFOUND);
 				} else
 					++chunk_count;
 				break;
@@ -1463,7 +1467,7 @@ __bamc_compress_merge_delete_dups(dbc, stream, countp)
 					 * current chunk, but don't delete
 					 * any more entries.
 					 */
-					bulk_ret = DB_NOTFOUND;
+					bulk_ret = DBC_ERR(dbc, DB_NOTFOUND);
 					moreStream = 0;
 					iSmallEnough = 0;
 				} else
@@ -1541,7 +1545,7 @@ __bamc_compress_merge_delete_dups(dbc, stream, countp)
 	CMP_FREE_DBT(env, &pdestdata);
 	CMP_FREE_DBT(env, &nextk);
 
-	return (ret != 0 ? ret : bulk_ret);
+	return (ret != 0 ? ret : DBC_ERR(dbc, bulk_ret));
 }
 
 /******************************************************************************/
@@ -1641,8 +1645,8 @@ __bamc_compress_get_prev_dup(dbc, flags)
 	if ((ret = __bamc_compress_get_prev(dbc, flags)) != 0)
 		return (ret);
 
-	if (t->bt_compare(dbp, cp->currentKey, &cp->del_key) != 0)
-		return (DB_NOTFOUND);
+	if (t->bt_compare(dbp, cp->currentKey, &cp->del_key, NULL) != 0)
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 
 	return (0);
 }
@@ -1684,7 +1688,7 @@ __bamc_compress_get_prev_nodup(dbc, flags)
 	do
 		if ((ret = __bamc_compress_get_prev(dbc, flags)) != 0)
 			return (ret);
-	while (t->bt_compare(dbp, cp->currentKey, &cp->del_key) == 0);
+	while (t->bt_compare(dbp, cp->currentKey, &cp->del_key, NULL) == 0);
 
 	return (0);
 }
@@ -1702,7 +1706,7 @@ __bamc_compress_get_next(dbc, flags)
 
 	if (F_ISSET(cp, C_COMPRESS_DELETED)) {
 		if (cp->currentKey == 0)
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 		F_CLR(cp, C_COMPRESS_DELETED);
 		return (0);
 	} else if (cp->currentKey) {
@@ -1722,7 +1726,7 @@ __bamc_compress_get_next(dbc, flags)
 		 * to the right place
 		 */
 		__bamc_compress_reset(dbc);
-		return (DB_NOTFOUND);
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 	} else if (ret != 0)
 		return (ret);
 
@@ -1753,17 +1757,18 @@ __bamc_compress_get_next_dup(dbc, key, flags)
 		 * deleted entry.
 		 */
 		if (cp->currentKey == 0)
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 		F_CLR(cp, C_COMPRESS_DELETED);
-		return (t->bt_compare(dbp,
-		    cp->currentKey, &cp->del_key) == 0 ? 0 : DB_NOTFOUND);
+		return (t->bt_compare(dbp, cp->currentKey,
+		    &cp->del_key, NULL) == 0 ? 0 : DB_NOTFOUND);
 	} else if (cp->currentKey == 0)
 		return (EINVAL);
 
 	/* Check that the next entry has the same key as the previous entry */
 	ret = __bamc_next_decompress(dbc);
-	if (ret == 0 && t->bt_compare(dbp, cp->currentKey, cp->prevKey) != 0)
-		return (DB_NOTFOUND);
+	if (ret == 0 && t->bt_compare(dbp,
+	    cp->currentKey, cp->prevKey, NULL) != 0)
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 	if (ret != DB_NOTFOUND)
 		return (ret);
 
@@ -1783,7 +1788,7 @@ __bamc_compress_get_next_dup(dbc, key, flags)
 		 * will end up pointing to the right place
 		 */
 		__bamc_compress_reset(dbc);
-		return (DB_NOTFOUND);
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 	} else if (ret != 0)
 		return (ret);
 
@@ -1791,8 +1796,8 @@ __bamc_compress_get_next_dup(dbc, key, flags)
 		return (ret);
 
 	/* Check the keys are the same */
-	if (t->bt_compare(dbp, cp->currentKey, key) != 0)
-		return (DB_NOTFOUND);
+	if (t->bt_compare(dbp, cp->currentKey, key, NULL) != 0)
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 
 	return (0);
 }
@@ -1828,7 +1833,7 @@ __bamc_compress_get_next_nodup(dbc, flags)
 	do
 		if ((ret = __bamc_compress_get_next(dbc, flags)) != 0)
 			return (ret);
-	while (t->bt_compare(dbp, cp->currentKey, &cp->del_key) == 0);
+	while (t->bt_compare(dbp, cp->currentKey, &cp->del_key, NULL) == 0);
 
 	return (ret);
 }
@@ -1888,14 +1893,14 @@ __bamc_compress_get_set(dbc, key, data, method, flags)
 		if (ret == 0 &&
 		    __db_compare_both(dbp, cp->currentKey, 0, key, 0) != 0) {
 			/* We didn't find the key */
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 		}
 		break;
 	case DB_GET_BOTH:
 		if (ret == 0 && (cmp != 0 || (!F_ISSET(dbp, DB_AM_DUPSORT) &&
-		    __bam_defcmp(dbp, cp->currentData, data) != 0))) {
+		    __bam_defcmp(dbp, cp->currentData, data, NULL) != 0))) {
 			/* We didn't find the key/data pair */
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 		}
 		break;
 	default:
@@ -1923,7 +1928,7 @@ __bamc_compress_get_bothc(dbc, data, flags)
 	   position */
 	if (__db_compare_both(dbp, cp->currentKey,
 	    cp->currentData, cp->currentKey, data) >= 0)
-		return (DB_NOTFOUND);
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 
 	cmp = 0;
 	/* Perform a linear search for the data in the current chunk */
@@ -1933,7 +1938,7 @@ __bamc_compress_get_bothc(dbc, data, flags)
 		continue;
 
 	if (ret == 0)
-		return (cmp == 0 ? 0 : DB_NOTFOUND);
+		return (cmp == 0 ? 0 : DBC_ERR(dbc, DB_NOTFOUND));
 	if (ret != DB_NOTFOUND)
 		return (ret);
 
@@ -2277,7 +2282,7 @@ __bamc_compress_iput(dbc, key, data, flags)
 	switch (flags) {
 	case DB_CURRENT:
 		if (cp->currentKey == 0 || F_ISSET(cp, C_COMPRESS_DELETED)) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto end;
 		}
 
@@ -2290,7 +2295,7 @@ __bamc_compress_iput(dbc, key, data, flags)
 
 		if (F_ISSET(dbp, DB_AM_DUPSORT) &&
 		    ((BTREE *)dbp->bt_internal)->compress_dup_compare(
-		    dbp, cp->currentData, data) != 0) {
+		    dbp, cp->currentData, data, NULL) != 0) {
 			__db_errx(env, DB_STR("1032",
 			    "Existing data sorts differently from put data"));
 			ret = EINVAL;
@@ -2464,7 +2469,7 @@ __bamc_compress_idel(dbc, flags)
 	if (F_ISSET(cp, C_COMPRESS_DELETED))
 		return DB_KEYEMPTY;
 	if (cp->currentKey == 0)
-		return DB_NOTFOUND;
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 
 	if ((ret = __bam_compress_set_dbt(dbp, &cp->del_key,
 		     cp->currentKey->data, cp->currentKey->size)) != 0)
@@ -3015,7 +3020,8 @@ __bam_compress_count(dbc, nkeysp, ndatap)
 		if (ret != 0)
 			goto err;
 
-		if (t->bt_compare(dbp, cp_n->currentKey, cp_n->prevKey) != 0)
+		if (t->bt_compare(dbp,
+		    cp_n->currentKey, cp_n->prevKey, NULL) != 0)
 			nkeys += 1;
 	}
 
diff --git a/src/btree/bt_conv.c b/src/btree/bt_conv.c
index 348ce5c2..85baeed8 100644
--- a/src/btree/bt_conv.c
+++ b/src/btree/bt_conv.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -88,7 +88,12 @@ __bam_mswap(env, pg)
 	SWAP32(p);		/* re_len */
 	SWAP32(p);		/* re_pad */
 	SWAP32(p);		/* root */
-	p += 92 * sizeof(u_int32_t); /* unused */
+	SWAP32(p);		/* threshold */
+	SWAP32(p);		/* file id lo */
+	SWAP32(p);		/* file id hi */
+	SWAP32(p);		/* sdb id lo */
+	SWAP32(p);		/* sdb id hi */
+	p += 87 * sizeof(u_int32_t); /* unused */
 	SWAP32(p);		/* crypto_magic */
 
 	return (0);
diff --git a/src/btree/bt_curadj.c b/src/btree/bt_curadj.c
index 78606009..d3398ee8 100644
--- a/src/btree/bt_curadj.c
+++ b/src/btree/bt_curadj.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index 860c31ce..d63b7373 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -938,7 +938,7 @@ __bamc_get(dbc, key, data, flags, pgnop)
 	case DB_CURRENT:
 		/* It's not possible to return a deleted record. */
 		if (F_ISSET(cp, C_DELETED)) {
-			ret = DB_KEYEMPTY;
+			ret = DBC_ERR(dbc, DB_KEYEMPTY);
 			goto err;
 		}
 
@@ -979,7 +979,7 @@ __bamc_get(dbc, key, data, flags, pgnop)
 				goto err;
 			if (flags == DB_GET_BOTH) {
 				if (!exact) {
-					ret = DB_NOTFOUND;
+					ret = DBC_ERR(dbc, DB_NOTFOUND);
 					goto err;
 				}
 				break;
@@ -1000,7 +1000,7 @@ __bamc_get(dbc, key, data, flags, pgnop)
 			    dbc, PGNO_INVALID, key, flags, &exact)) != 0)
 				return (ret);
 			if (!exact) {
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto err;
 			}
 
@@ -1047,7 +1047,7 @@ __bamc_get(dbc, key, data, flags, pgnop)
 		if ((ret = __bamc_next(dbc, 1, 0)) != 0)
 			goto err;
 		if (!IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 		break;
@@ -1077,7 +1077,7 @@ __bamc_get(dbc, key, data, flags, pgnop)
 		if ((ret = __bamc_prev(dbc)) != 0)
 			goto err;
 		if (!IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 		break;
@@ -1173,12 +1173,15 @@ __bam_bulk(dbc, data, flags)
 	DBT *data;
 	u_int32_t flags;
 {
+	BBLOB bl;
 	BKEYDATA *bk;
 	BOVERFLOW *bo;
 	BTREE_CURSOR *cp;
 	PAGE *pg;
 	db_indx_t *inp, indx, pg_keyoff;
 	int32_t  *endp, key_off, *offp, *saveoffp;
+	off_t blob_size;
+	db_seq_t blob_id;
 	u_int8_t *dbuf, *dp, *np;
 	u_int32_t key_size, pagesize, size, space;
 	int adj, is_key, need_pg, next_key, no_dup, rec_key, ret;
@@ -1279,6 +1282,7 @@ next_pg:
 		 */
 		if (is_key && pg_keyoff != inp[indx]) {
 			bk = GET_BKEYDATA(dbc->dbp, pg, indx);
+			DB_ASSERT(dbc->env, B_TYPE(bk->type) != B_BLOB);
 			if (B_TYPE(bk->type) == B_OVERFLOW) {
 				bo = (BOVERFLOW *)bk;
 				size = key_size = bo->tlen;
@@ -1403,6 +1407,31 @@ get_key_space:
 			*offp-- = (int32_t)(np - dbuf);
 			np += size;
 			*offp-- = (int32_t)size;
+		} else if (B_TYPE(bk->type) == B_BLOB) {
+			blob_size = 0;
+			blob_id = 0;
+			memcpy(&bl, bk, BBLOB_SIZE);
+			GET_BLOB_SIZE(dbc->env, bl, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			if (blob_size > UINT32_MAX) {
+				size = UINT32_MAX;
+				goto back_up;
+			}
+			size = (u_int32_t)blob_size;
+			if (size > space)
+				goto back_up;
+			blob_id = (db_seq_t)bl.id;
+			if ((ret = __blob_bulk(dbc, size, blob_id, np)) != 0)
+				return (ret);
+			if (is_key) {
+				*offp-- = (int32_t)key_off;
+				*offp-- = (int32_t)key_size;
+			}
+			space -= size;
+			*offp-- = (int32_t)(np - dbuf);
+			np += size;
+			*offp-- = (int32_t)size;
 		} else {
 			if (need_pg) {
 				dp = np;
@@ -1764,11 +1793,11 @@ __bam_getbothc(dbc, data)
 		 */
 		if ((ret = __bam_cmp(dbc, data, cp->page, cp->indx,
 		    dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare,
-		    &cmp)) != 0)
+		    &cmp, NULL)) != 0)
 			return (ret);
 
 		if (cmp <= 0)
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 
 		/* Discard the current page, we're going to do a full search. */
 		if ((ret = __memp_fput(mpf,
@@ -1791,7 +1820,7 @@ __bam_getbothc(dbc, data)
 	 */
 	if (cp->indx + P_INDX >= NUM_ENT(cp->page) ||
 	    !IS_DUPLICATE(dbc, cp->indx, cp->indx + P_INDX))
-		return (DB_NOTFOUND);
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 	cp->indx += P_INDX;
 
 	return (__bam_getboth_finddatum(dbc, data, DB_GET_BOTH));
@@ -1842,7 +1871,7 @@ __bam_getlte(dbc, key, data)
 
 		/* Check if we're still on the correct key */
 		if ((ret = __bam_cmp(dbc, key, cp->page, cp->indx,
-		    ((BTREE*)dbp->bt_internal)->bt_compare, &exact)) != 0)
+		    ((BTREE*)dbp->bt_internal)->bt_compare, &exact, NULL)) != 0)
 			goto end;
 		exact = (exact == 0);
 	}
@@ -1884,8 +1913,8 @@ __bam_getlte(dbc, key, data)
 			if (data != NULL) {
 				/* Check if we're still on the correct data */
 				if ((ret = __bam_cmp(
-					    dbc, data, ocp->page, ocp->indx,
-					    dbp->dup_compare, &exact)) != 0)
+				    dbc, data, ocp->page, ocp->indx,
+				    dbp->dup_compare, &exact, NULL)) != 0)
 					goto end;
 				exact = (exact == 0);
 			} else
@@ -1915,7 +1944,8 @@ __bam_getlte(dbc, key, data)
 		else {
 			/* Check if we're still on the correct data */
 			if ((ret = __bam_cmp(dbc, data, cp->page,
-			    cp->indx + O_INDX, dbp->dup_compare, &exact)) != 0)
+			    cp->indx + O_INDX, dbp->dup_compare,
+			    &exact, NULL)) != 0)
 				goto end;
 			exact = (exact == 0);
 		}
@@ -1982,7 +2012,7 @@ __bam_getboth_finddatum(dbc, data, flags)
 			if (!IS_CUR_DELETED(dbc)) {
 				if ((ret = __bam_cmp(
 				    dbc, data, cp->page, cp->indx + O_INDX,
-				    __bam_defcmp, &cmp)) != 0)
+				    __bam_defcmp, &cmp, NULL)) != 0)
 					return (ret);
 				if (cmp == 0)
 					return (0);
@@ -1992,7 +2022,8 @@ __bam_getboth_finddatum(dbc, data, flags)
 			    !IS_DUPLICATE(dbc, cp->indx, cp->indx + P_INDX))
 				break;
 		}
-		return (DB_NOTFOUND);
+
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 	}
 
 	/*
@@ -2008,18 +2039,18 @@ __bam_getboth_finddatum(dbc, data, flags)
 			break;
 	if (base == (top - P_INDX)) {
 		if  ((ret = __bam_cmp(dbc, data, cp->page,
-		    cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
+		    cp->indx + O_INDX, dbp->dup_compare, &cmp, NULL)) != 0)
 			return (ret);
 		if (cmp == 0 || (cmp < 0 && flags == DB_GET_BOTH_RANGE))
 			return (0);
 		cp->indx = top;
-		return DB_NOTFOUND;
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 	}
 
 	for (lim = (top - base) / (db_indx_t)P_INDX; lim != 0; lim >>= 1) {
 		cp->indx = base + ((lim >> 1) * P_INDX);
 		if ((ret = __bam_cmp(dbc, data, cp->page,
-		    cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
+		    cp->indx + O_INDX, dbp->dup_compare, &cmp, NULL)) != 0)
 			return (ret);
 		if (cmp == 0) {
 			/*
@@ -2039,7 +2070,7 @@ __bam_getboth_finddatum(dbc, data, flags)
 
 	/* No match found; if we're looking for an exact match, we're done. */
 	if (flags == DB_GET_BOTH)
-		return (DB_NOTFOUND);
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 
 	/*
 	 * Base is the smallest index greater than the data item, may be zero
@@ -2049,7 +2080,7 @@ __bam_getboth_finddatum(dbc, data, flags)
 	cp->indx = base;
 	while (cp->indx < top && IS_CUR_DELETED(dbc))
 		cp->indx += P_INDX;
-	return (cp->indx < top ? 0 : DB_NOTFOUND);
+	return (cp->indx < top ? 0 : DBC_ERR(dbc, DB_NOTFOUND));
 }
 
 /*
@@ -2082,7 +2113,7 @@ split:	ret = stack = 0;
 	switch (flags) {
 	case DB_CURRENT:
 		if (F_ISSET(cp, C_DELETED))
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 		/* FALLTHROUGH */
 	case DB_AFTER:
 	case DB_BEFORE:
@@ -2206,7 +2237,8 @@ split:	ret = stack = 0;
 		 */
 		for (;; cp->indx += P_INDX) {
 			if ((ret = __bam_cmp(dbc, data, cp->page,
-			    cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
+			    cp->indx + O_INDX, dbp->dup_compare,
+			    &cmp, NULL)) != 0)
 				goto err;
 			if (cmp < 0) {
 				iiop = DB_BEFORE;
@@ -2479,7 +2511,7 @@ __bamc_next(dbc, initial_move, deleted_okay)
 		 */
 		if (cp->indx >= NUM_ENT(cp->page)) {
 			if ((pgno = NEXT_PGNO(cp->page)) == PGNO_INVALID)
-				return (DB_NOTFOUND);
+				return (DBC_ERR(dbc, DB_NOTFOUND));
 
 			ACQUIRE_CUR(dbc, lock_mode, pgno, 0, ret);
 			if (ret != 0)
@@ -2539,7 +2571,7 @@ __bamc_prev(dbc)
 		if (cp->indx == 0) {
 			if ((pgno =
 			    PREV_PGNO(cp->page)) == PGNO_INVALID)
-				return (DB_NOTFOUND);
+				return (DBC_ERR(dbc, DB_NOTFOUND));
 
 			ACQUIRE_CUR(dbc, lock_mode, pgno, 0, ret);
 			if (ret != 0)
@@ -2711,11 +2743,11 @@ __bamc_search(dbc, root_pgno, key, flags, exactp)
 	if (h->next_pgno == PGNO_INVALID) {
 		indx = NUM_ENT(h) - P_INDX;
 		if ((ret = __bam_cmp(dbc, key, h, indx,
-		    t->bt_compare, &cmp)) != 0)
+		    t->bt_compare, &cmp, NULL)) != 0)
 			goto fast_miss;
 		if (cmp > 0) {
 			if (FLD_ISSET(sflags, SR_EXACT))
-				return (DB_NOTFOUND);
+				return (DBC_ERR(dbc, DB_NOTFOUND));
 			else
 				indx += P_INDX;
 		}
@@ -2725,10 +2757,10 @@ __bamc_search(dbc, root_pgno, key, flags, exactp)
 	if (h->prev_pgno == PGNO_INVALID) {
 		indx = 0;
 		if ((ret = __bam_cmp(dbc, key, h, indx,
-		    t->bt_compare, &cmp)) != 0)
+		    t->bt_compare, &cmp, NULL)) != 0)
 			goto fast_miss;
 		if (cmp < 0 && FLD_ISSET(sflags, SR_EXACT))
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 		if (cmp <= 0)
 			goto fast_hit;
 	}
@@ -2736,7 +2768,7 @@ __bamc_search(dbc, root_pgno, key, flags, exactp)
 		DB_BINARY_SEARCH_FOR(base, lim, NUM_ENT(h), P_INDX) {
 			DB_BINARY_SEARCH_INCR(indx, base, lim, P_INDX);
 			if ((ret = __bam_cmp(dbc, key, h, indx,
-			    t->bt_compare, &cmp)) != 0)
+			    t->bt_compare, &cmp, NULL)) != 0)
 				goto fast_miss;
 
 			if (cmp == 0)
@@ -2752,7 +2784,7 @@ __bamc_search(dbc, root_pgno, key, flags, exactp)
 		indx = base;
 		if (indx > 0 && indx < NUM_ENT(h)) {
 			if (FLD_ISSET(sflags, SR_EXACT))
-				return (DB_NOTFOUND);
+				return (DBC_ERR(dbc, DB_NOTFOUND));
 			goto fast_hit;
 		}
 	}
@@ -3068,7 +3100,7 @@ __bam_opd_exists(dbc, pgno)
 	if (NUM_ENT(h) == 0)
 		ret = 0;
 	else
-		ret = DB_KEYEXIST;
+		ret = DBC_ERR(dbc, DB_KEYEXIST);
 
 	(void)__memp_fput(dbc->dbp->mpf, dbc->thread_info, h, dbc->priority);
 
diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c
index 37496b3f..a1ccef71 100644
--- a/src/btree/bt_delete.c
+++ b/src/btree/bt_delete.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -61,15 +61,18 @@ __bam_ditem(dbc, h, indx)
 	PAGE *h;
 	u_int32_t indx;
 {
+	BBLOB bl;
 	BINTERNAL *bi;
 	BKEYDATA *bk;
 	DB *dbp;
+	db_seq_t blob_id;
 	u_int32_t nbytes;
 	int ret;
 	db_indx_t *inp;
 
 	dbp = dbc->dbp;
 	inp = P_INP(dbp, h);
+	ret = 0;
 
 	/* The page should already have been dirtied by our caller. */
 	DB_ASSERT(dbp->env, IS_DIRTY(h));
@@ -139,6 +142,13 @@ __bam_ditem(dbc, h, indx)
 			    dbc, (GET_BOVERFLOW(dbp, h, indx))->pgno)) != 0)
 				return (ret);
 			break;
+		case B_BLOB:
+			nbytes = BBLOB_SIZE;
+			memcpy(&bl, bk, BBLOB_SIZE);
+			blob_id = (db_seq_t)bl.id;
+			if ((ret = __blob_del(dbc, blob_id)) != 0)
+				return (ret);
+			break;
 		case B_KEYDATA:
 			nbytes = BKEYDATA_SIZE(bk->len);
 			break;
@@ -241,7 +251,7 @@ __bam_dpages(dbc, use_top, flags)
 	 * single item deleted, and the rest of the pages are to be removed.
 	 *
 	 * Recno always has a stack to the root and __bam_merge operations
-	 * may have unneeded items in the sack.  We find the lowest page
+	 * may have unneeded items in the stack.  We find the lowest page
 	 * in the stack that has more than one record in it and start there.
 	 */
 	ret = 0;
@@ -493,7 +503,9 @@ stop:			done = 1;
 
 /*
  * __bam_pupdate --
- *	Update parent key pointers up the tree.
+ *	Update parent key pointers up the tree after putting a new key
+ *	at the start of a leaf page.
+ *
  *
  * PUBLIC: int __bam_pupdate __P((DBC *, PAGE *));
  */
diff --git a/src/btree/bt_method.c b/src/btree/bt_method.c
index 5cf93d2e..2fb33be2 100644
--- a/src/btree/bt_method.c
+++ b/src/btree/bt_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -15,7 +15,7 @@
 
 static int __bam_set_bt_minkey __P((DB *, u_int32_t));
 static int __bam_get_bt_compare
-	       __P((DB *, int (**)(DB *, const DBT *, const DBT *)));
+	       __P((DB *, int (**)(DB *, const DBT *, const DBT *, size_t *)));
 static int __bam_get_bt_prefix
 	       __P((DB *, size_t(**)(DB *, const DBT *, const DBT *)));
 static int __bam_set_bt_prefix
@@ -233,7 +233,7 @@ incompat:
 static int
 __bam_get_bt_compare(dbp, funcp)
 	DB *dbp;
-	int (**funcp) __P((DB *, const DBT *, const DBT *));
+	int (**funcp) __P((DB *, const DBT *, const DBT *, size_t *));
 {
 	BTREE *t;
 
@@ -251,13 +251,13 @@ __bam_get_bt_compare(dbp, funcp)
  * __bam_set_bt_compare --
  *	Set the comparison function.
  *
- * PUBLIC: int __bam_set_bt_compare
- * PUBLIC:         __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+ * PUBLIC: int __bam_set_bt_compare __P((DB *,
+ * PUBLIC:     int (*)(DB *, const DBT *, const DBT *, size_t *)));
  */
 int
 __bam_set_bt_compare(dbp, func)
 	DB *dbp;
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
 {
 	BTREE *t;
 
@@ -351,6 +351,13 @@ __bam_set_bt_compress(dbp, compress, decompress)
 		return (EINVAL);
 	}
 
+	/* Compression is incompatible with blob storage. */
+	if (dbp->blob_threshold > 0) {
+		__db_errx(dbp->env, DB_STR("1198",
+		    "compression cannot be used with blobs enabled."));
+		return (EINVAL);
+	}
+
 	if (compress != 0 && decompress != 0) {
 		t->bt_compress = compress;
 		t->bt_decompress = decompress;
diff --git a/src/btree/bt_open.c b/src/btree/bt_open.c
index 7be141c1..46a866d0 100644
--- a/src/btree/bt_open.c
+++ b/src/btree/bt_open.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -44,6 +44,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/crypto.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_swap.h"
@@ -119,6 +120,7 @@ __bam_metachk(dbp, name, btm)
 	int ret;
 
 	env = dbp->env;
+	ret = 0;
 
 	/*
 	 * At this point, all we know is that the magic number is for a Btree.
@@ -136,6 +138,7 @@ __bam_metachk(dbp, name, btm)
 		return (DB_OLD_VERSION);
 	case 8:
 	case 9:
+	case 10:
 		break;
 	default:
 		__db_errx(env, DB_STR_A("1009",
@@ -269,6 +272,29 @@ __bam_metachk(dbp, name, btm)
 	/* Set the page size. */
 	dbp->pgsize = btm->dbmeta.pagesize;
 
+	dbp->blob_threshold = btm->blob_threshold;
+	GET_BLOB_FILE_ID(env, btm, dbp->blob_file_id, ret);
+	if (ret != 0)
+		return (ret);
+	GET_BLOB_SDB_ID(env, btm, dbp->blob_sdb_id, ret);
+	if (ret != 0)
+		return (ret);
+	/* Blob databases must be upgraded. */
+	if (vers == 9 && (dbp->blob_file_id != 0 || dbp->blob_sdb_id != 0)) {
+	    __db_errx(env, DB_STR_A("1207",
+"%s: databases that support blobs must be upgraded.", "%s"),
+		    name);
+		return (EINVAL);
+	}
+#ifndef HAVE_64BIT_TYPES
+	if (dbp->blob_file_id != 0 || dbp->blob_sdb_id != 0) {
+		__db_errx(env, DB_STR_A("1199",
+		    "%s: blobs require 64 integer compiler support.", "%s"),
+		    name);
+		return (DB_OPNOTSUP);
+	}
+#endif
+
 	/* Copy the file's ID. */
 	memcpy(dbp->fileid, btm->dbmeta.uid, DB_FILE_ID_LEN);
 
@@ -442,6 +468,9 @@ __bam_init_meta(dbp, meta, pgno, lsnp)
 	meta->minkey = t->bt_minkey;
 	meta->re_len = t->re_len;
 	meta->re_pad = (u_int32_t)t->re_pad;
+	meta->blob_threshold = dbp->blob_threshold;
+	SET_BLOB_META_FILE_ID(meta, dbp->blob_file_id, BTMETA);
+	SET_BLOB_META_SDB_ID(meta, dbp->blob_sdb_id, BTMETA);
 
 #ifdef HAVE_PARTITION
 	if ((part = dbp->p_internal) != NULL) {
@@ -535,6 +564,12 @@ __bam_new_file(dbp, ip, txn, fhp, name)
 		pginfo.type = dbp->type;
 		pdbt.data = &pginfo;
 		pdbt.size = sizeof(pginfo);
+		if (dbp->blob_threshold) {
+			if ((ret = __blob_generate_dir_ids(dbp, txn,
+			    &dbp->blob_file_id)) != 0)
+				return (ret);
+
+		}
 		if ((ret = __os_calloc(env, 1, dbp->pgsize, &buf)) != 0)
 			return (ret);
 		meta = (BTMETA *)buf;
@@ -613,6 +648,12 @@ __bam_new_subdb(mdbp, dbp, ip, txn)
 	meta = NULL;
 	root = NULL;
 
+	if (dbp->blob_threshold) {
+		if ((ret = __blob_generate_dir_ids(dbp, txn,
+		    &dbp->blob_sdb_id)) != 0)
+			return (ret);
+	}
+
 	if ((ret = __db_cursor(mdbp, ip, txn,
 	    &dbc, CDB_LOCKING(env) ?  DB_WRITECURSOR : 0)) != 0)
 		return (ret);
diff --git a/src/btree/bt_put.c b/src/btree/bt_put.c
index 13316181..5cd0ac12 100644
--- a/src/btree/bt_put.c
+++ b/src/btree/bt_put.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -56,8 +56,8 @@ static int __bam_dup_check __P((DBC *, u_int32_t,
 static int __bam_dup_convert __P((DBC *, PAGE *, u_int32_t, u_int32_t));
 static int __bam_ovput
 	       __P((DBC *, u_int32_t, db_pgno_t, PAGE *, u_int32_t, DBT *));
-static u_int32_t
-	   __bam_partsize __P((DB *, u_int32_t, DBT *, PAGE *, u_int32_t));
+static int __bam_partsize
+		__P((DB *, u_int32_t, DBT *, PAGE *, u_int32_t, u_int32_t *));
 
 /*
  * __bam_iitem --
@@ -71,18 +71,22 @@ __bam_iitem(dbc, key, data, op, flags)
 	DBT *key, *data;
 	u_int32_t op, flags;
 {
+	BBLOB bl, blob_buf;
 	BKEYDATA *bk, bk_tmp;
 	BTREE *t;
 	BTREE_CURSOR *cp;
 	DB *dbp;
-	DBT bk_hdr, tdbt;
+	DBT bk_hdr, blob_dbt, tdbt;
 	DB_MPOOLFILE *mpf;
 	ENV *env;
+	DB_LSN lsn;
 	PAGE *h;
 	db_indx_t cnt, indx;
+	off_t blob_size;
+	db_seq_t blob_id, new_blob_id;
 	u_int32_t data_size, have_bytes, need_bytes, needed, pages, pagespace;
 	char tmp_ch;
-	int cmp, bigkey, bigdata, del, dupadjust;
+	int cmp, bigkey, bigdata, blobdata, del, dupadjust;
 	int padrec, replace, ret, t_ret, was_deleted;
 
 	COMPQUIET(cnt, 0);
@@ -95,6 +99,7 @@ __bam_iitem(dbc, key, data, op, flags)
 	h = cp->page;
 	indx = cp->indx;
 	del = dupadjust = replace = was_deleted = 0;
+	blobdata = 0;
 
 	/*
 	 * Fixed-length records with partial puts: it's an error to specify
@@ -112,8 +117,12 @@ __bam_iitem(dbc, key, data, op, flags)
 	 * longer than the fixed-length, and we never require less than
 	 * the fixed-length record size.
 	 */
-	data_size = F_ISSET(data, DB_DBT_PARTIAL) ?
-	    __bam_partsize(dbp, op, data, h, indx) : data->size;
+	if (F_ISSET(data, DB_DBT_PARTIAL)) {
+		if ((ret = __bam_partsize(
+		    dbp, op, data, h, indx, &data_size)) != 0)
+			return (ret);
+	} else
+		data_size = data->size;
 	padrec = 0;
 	if (F_ISSET(dbp, DB_AM_FIXEDLEN)) {
 		if (data_size > t->re_len)
@@ -190,6 +199,13 @@ __bam_iitem(dbc, key, data, op, flags)
 	}
 	if (!F_ISSET(data, DB_DBT_STREAMING) &&
 	    (padrec || F_ISSET(data, DB_DBT_PARTIAL))) {
+		/* Partial puts need to be handled in the blob functions. */
+		if (op == DB_CURRENT) {
+			bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ?
+			    O_INDX : 0));
+			if (B_TYPE(bk->type) == B_BLOB)
+				goto dup_cmp;
+		}
 		tdbt = *data;
 		if ((ret =
 		    __bam_build(dbc, op, &tdbt, h, indx, data_size)) != 0)
@@ -204,10 +220,10 @@ __bam_iitem(dbc, key, data, op, flags)
 	 * screwing up the duplicate sort order.  We have to do this after
 	 * we build the real record so that we're comparing the real items.
 	 */
-	if (op == DB_CURRENT && dbp->dup_compare != NULL) {
+dup_cmp:if (op == DB_CURRENT && dbp->dup_compare != NULL) {
 		if ((ret = __bam_cmp(dbc, data, h,
 		    indx + (TYPE(h) == P_LBTREE ? O_INDX : 0),
-		    dbp->dup_compare, &cmp)) != 0)
+		    dbp->dup_compare, &cmp, NULL)) != 0)
 			return (ret);
 		if (cmp != 0) {
 			__db_errx(env, DB_STR("1004",
@@ -218,10 +234,30 @@ __bam_iitem(dbc, key, data, op, flags)
 
 	/*
 	 * If the key or data item won't fit on a page, we'll have to store
-	 * them on overflow pages.
+	 * them on overflow pages.  The exception is if we are inserting
+	 * into an existing blob file, in that case it remains a blob
+	 * file regardless of its new size.
 	 */
+	if (op == DB_CURRENT) {
+		bk = GET_BKEYDATA(
+		    dbp, h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+		if (B_TYPE(bk->type) == B_BLOB) {
+			blobdata = 1;
+			bigdata = 0;
+		} else
+			bigdata = data_size > cp->ovflsize;
+	} else {
+		if (dbp->blob_threshold &&
+		    (dbp->blob_threshold <= data_size ||
+		    F_ISSET(data, DB_DBT_BLOB))) {
+			blobdata = 1;
+			bigdata = 0;
+		} else {
+			blobdata = 0;
+			bigdata = data_size > cp->ovflsize;
+		}
+	}
 	needed = 0;
-	bigdata = data_size > cp->ovflsize;
 	switch (op) {
 	case DB_KEYFIRST:
 		/* We're adding a new key and data pair. */
@@ -232,6 +268,8 @@ __bam_iitem(dbc, key, data, op, flags)
 			needed += BKEYDATA_PSIZE(key->size);
 		if (bigdata)
 			needed += BOVERFLOW_PSIZE;
+		else if (blobdata)
+			needed += BBLOB_PSIZE;
 		else
 			needed += BKEYDATA_PSIZE(data_size);
 		break;
@@ -254,6 +292,8 @@ __bam_iitem(dbc, key, data, op, flags)
 			    indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
 			if (B_TYPE(bk->type) == B_KEYDATA)
 				have_bytes = BKEYDATA_PSIZE(bk->len);
+			else if (B_TYPE(bk->type) == B_BLOB)
+				have_bytes = BBLOB_PSIZE;
 			else
 				have_bytes = BOVERFLOW_PSIZE;
 			need_bytes = 0;
@@ -263,6 +303,8 @@ __bam_iitem(dbc, key, data, op, flags)
 		}
 		if (bigdata)
 			need_bytes += BOVERFLOW_PSIZE;
+		else if (blobdata)
+			need_bytes += BBLOB_PSIZE;
 		else
 			need_bytes += BKEYDATA_PSIZE(data_size);
 
@@ -405,7 +447,8 @@ __bam_iitem(dbc, key, data, op, flags)
 		 * because we're going to immediately re-add the item into the
 		 * same slot.
 		 */
-		if (bigdata || B_TYPE(bk->type) != B_KEYDATA) {
+		if (bigdata || (B_TYPE(bk->type) != B_KEYDATA &&
+		    B_TYPE(bk->type) != B_BLOB)) {
 			/*
 			 * If streaming, don't delete the overflow item,
 			 * just delete the item pointing to the overflow item.
@@ -448,13 +491,65 @@ __bam_iitem(dbc, key, data, op, flags)
 			bk_hdr.size = SSZA(BKEYDATA, data);
 			ret = __db_pitem(dbc, h, indx,
 			    BKEYDATA_SIZE(data->size), &bk_hdr, data);
-		} else if (replace)
-			ret = __bam_ritem(dbc, h, indx, data, 0);
-		else
-			ret = __db_pitem(dbc, h, indx,
-			    BKEYDATA_SIZE(data->size), NULL, data);
+		} else if (replace) {
+			/*
+			 * If updating a blob, replace the blob file with the
+			 * new blob data and updated the blob db record.
+			 */
+			if (blobdata) {
+				memcpy(&bl,
+				    P_ENTRY(dbp, h, indx), BBLOB_SIZE);
+				memset(&blob_dbt, 0, sizeof(DBT));
+				blob_dbt.size = BBLOB_DSIZE;
+				if (F_ISSET(data, DB_DBT_BLOB_REC)) {
+					/*
+					 * Replace the blob record with the
+					 * blob record in the data DBT.
+					 */
+					blob_dbt.data = BBLOB_DATA(data->data);
+				} else {
+					blob_id = (db_seq_t)bl.id;
+					GET_BLOB_SIZE(
+					    dbp->env, bl, blob_size, ret);
+					if (ret != 0)
+						goto err;
+					if ((ret = __blob_repl(
+					    dbc, data, blob_id,
+					    &new_blob_id, &blob_size)) != 0)
+						goto err;
+					blob_dbt.data = BBLOB_DATA((&bl));
+					SET_BLOB_ID(&bl, new_blob_id, BBLOB);
+					SET_BLOB_SIZE(&bl, blob_size, BBLOB);
+				}
+				ret = __bam_ritem(
+				    dbc, h, indx, &blob_dbt, B_BLOB);
+			} else
+				ret = __bam_ritem(dbc, h, indx, data, 0);
+		} else
+			if (blobdata) {
+				new_blob_id = 0;
+				blob_size = 0;
+				if ((ret = __blob_put(dbc, data,
+				    &new_blob_id, &blob_size, &lsn)) != 0)
+					goto err;
+				memset(&blob_buf, 0, BBLOB_SIZE);
+				blob_buf.type = B_BLOB;
+				blob_buf.len = BBLOB_DSIZE;
+				tdbt.data = &blob_buf;
+				tdbt.size = BBLOB_SIZE;
+				SET_BLOB_ID(&blob_buf, new_blob_id, BBLOB);
+				SET_BLOB_SIZE(&blob_buf, blob_size, BBLOB);
+				SET_BLOB_FILE_ID(
+				    &blob_buf, dbp->blob_file_id, BBLOB);
+				SET_BLOB_SDB_ID(
+				    &blob_buf, dbp->blob_sdb_id, BBLOB);
+				ret = __db_pitem(dbc, h,
+				    indx, BBLOB_SIZE, &tdbt, NULL);
+			} else
+				ret = __db_pitem(dbc, h, indx,
+				    BKEYDATA_SIZE(data->size), NULL, data);
 	}
-	if (ret != 0) {
+err:	if (ret != 0) {
 		if (del == 1 && (t_ret =
 		     __bam_ca_di(dbc, PGNO(h), indx + 1, -1)) != 0) {
 			__db_err(env, t_ret, DB_STR("1005",
@@ -504,32 +599,61 @@ __bam_iitem(dbc, key, data, op, flags)
  * __bam_partsize --
  *	Figure out how much space a partial data item is in total.
  */
-static u_int32_t
-__bam_partsize(dbp, op, data, h, indx)
+static int
+__bam_partsize(dbp, op, data, h, indx, data_size)
 	DB *dbp;
 	u_int32_t op, indx;
 	DBT *data;
 	PAGE *h;
+	u_int32_t *data_size;
 {
+	BBLOB bl;
 	BKEYDATA *bk;
+	int ret;
+	off_t blob_size;
 	u_int32_t nbytes;
 
+	ret = 0;
+
 	/*
 	 * If the record doesn't already exist, it's simply the data we're
 	 * provided.
 	 */
-	if (op != DB_CURRENT)
-		return (data->doff + data->size);
+	if (op != DB_CURRENT) {
+		*data_size = data->doff + data->size;
+		return (0);
+	}
 
 	/*
 	 * Otherwise, it's the data provided plus any already existing data
 	 * that we're not replacing.
 	 */
 	bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
-	nbytes =
-	    B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len;
+	switch (B_TYPE(bk->type)) {
+	case B_BLOB:
+		memcpy(&bl, bk, BBLOB_SIZE);
+		GET_BLOB_SIZE(dbp->env, bl, blob_size, ret);
+		if (ret != 0)
+			return (ret);
+		/*
+		 * It is not possible to add data past UINT32_MAX in the
+		 * partial API, so this is safe.
+		 */
+		if (blob_size > UINT32_MAX)
+			nbytes = UINT32_MAX;
+		else
+			nbytes = (u_int32_t)blob_size;
+		break;
+	case B_OVERFLOW:
+		nbytes = ((BOVERFLOW *)bk)->tlen;
+		break;
+	default:
+		nbytes = bk->len;
+	}
 
-	return (__db_partsize(nbytes, data));
+	*data_size = __db_partsize(nbytes, data);
+
+	return (ret);
 }
 
 /*
@@ -848,6 +972,7 @@ __bam_irep(dbc, h, indx, hdr, data)
 	bi = GET_BINTERNAL(dbp, h, indx);
 	bn = (BINTERNAL *) hdr->data;
 
+	DB_ASSERT(dbc->env, B_TYPE(bi->type) != B_BLOB);
 	if (B_TYPE(bi->type) == B_OVERFLOW &&
 	    (ret = __db_doff(dbc, ((BOVERFLOW *)bi->data)->pgno)) != 0)
 		return (ret);
@@ -892,6 +1017,7 @@ __bam_dup_check(dbc, op, h, indx, sz, cntp)
 
 	/* Count the key once. */
 	bk = GET_BKEYDATA(dbp, h, indx);
+	DB_ASSERT(dbc->env, B_TYPE(bk->type) != B_BLOB);
 	sz += B_TYPE(bk->type) == B_KEYDATA ?
 	    BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
 
@@ -994,6 +1120,7 @@ __bam_dup_convert(dbc, h, indx, cnt)
 		 * overflow, then free up those pages).
 		 */
 		bk = GET_BKEYDATA(dbp, h, dindx + 1);
+		DB_ASSERT(dbc->env, B_TYPE(bk->type) != B_BLOB);
 		hdr.data = bk;
 		hdr.size = B_TYPE(bk->type) == B_KEYDATA ?
 		    BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE;
diff --git a/src/btree/bt_rec.c b/src/btree/bt_rec.c
index 026564b6..eb44d04b 100644
--- a/src/btree/bt_rec.c
+++ b/src/btree/bt_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/btree/bt_reclaim.c b/src/btree/bt_reclaim.c
index f465cc5a..1203ea35 100644
--- a/src/btree/bt_reclaim.c
+++ b/src/btree/bt_reclaim.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/btree/bt_recno.c b/src/btree/bt_recno.c
index 9356a742..abbd8efb 100644
--- a/src/btree/bt_recno.c
+++ b/src/btree/bt_recno.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -234,7 +234,7 @@ __ramc_del(dbc, flags)
 retry:	if ((ret = __bam_rsearch(dbc, &cp->recno, SR_DELETE, 1, &exact)) != 0)
 		goto err;
 	if (!exact) {
-		ret = DB_NOTFOUND;
+		ret = DBC_ERR(dbc, DB_NOTFOUND);
 		goto err;
 	}
 	stack = 1;
@@ -256,7 +256,7 @@ retry:	if ((ret = __bam_rsearch(dbc, &cp->recno, SR_DELETE, 1, &exact)) != 0)
 	 * if the record was "deleted", we could never have found it.
 	 */
 	if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type)) {
-		ret = DB_KEYEMPTY;
+		ret = DBC_ERR(dbc, DB_KEYEMPTY);
 		goto err;
 	}
 
@@ -391,7 +391,7 @@ retry:	switch (flags) {
 		 * a dup, so we set flags to DB_NEXT and keep going.
 		 */
 		if (!F_ISSET(dbc, DBC_OPD))
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 		/* FALLTHROUGH */
 	case DB_NEXT_NODUP:
 		/*
@@ -431,7 +431,7 @@ retry:	switch (flags) {
 		 * is a dup, so we set flags to DB_PREV and keep going.
 		 */
 		if (!F_ISSET(dbc, DBC_OPD))
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 		/* FALLTHROUGH */
 	case DB_PREV_NODUP:
 		/*
@@ -443,7 +443,7 @@ retry:	switch (flags) {
 		flags = DB_PREV;
 		if (cp->recno != RECNO_OOB) {
 			if (cp->recno == 1) {
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto err;
 			}
 			--cp->recno;
@@ -458,7 +458,7 @@ retry:	switch (flags) {
 		if ((ret = __bam_nrecs(dbc, &cp->recno)) != 0)
 			goto err;
 		if (cp->recno == 0) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 		break;
@@ -476,7 +476,7 @@ retry:	switch (flags) {
 			cp->recno++;
 			break;
 		}
-		ret = DB_NOTFOUND;
+		ret = DBC_ERR(dbc, DB_NOTFOUND);
 		goto err;
 		/* NOTREACHED */
 	case DB_GET_BOTH:
@@ -522,7 +522,7 @@ retry:	switch (flags) {
 		    1, &exact)) != 0)
 			goto err;
 		if (!exact) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 
@@ -561,22 +561,22 @@ retry:	switch (flags) {
 					(void)__bam_stkrel(dbc, STK_CLRDBC);
 					continue;
 				}
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto err;
 			default:
-				ret = DB_KEYEMPTY;
+				ret = DBC_ERR(dbc, DB_KEYEMPTY);
 				goto err;
 			}
 
 		if (flags == DB_GET_BOTH ||
 		    flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) {
 			if ((ret = __bam_cmp(dbc, data, cp->page, cp->indx,
-			    __bam_defcmp, &cmp)) != 0)
+			    __bam_defcmp, &cmp, NULL)) != 0)
 				return (ret);
 			if (cmp == 0)
 				break;
 			if (!F_ISSET(dbc, DBC_OPD)) {
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto err;
 			}
 			(void)__bam_stkrel(dbc, STK_CLRDBC);
@@ -1331,7 +1331,7 @@ __ram_sread(dbc, top)
 
 	if (0) {
 eof:		t->re_eof = 1;
-		ret = DB_NOTFOUND;
+		ret = DBC_ERR(dbc, DB_NOTFOUND);
 	}
 err:	if (!was_modified)
 		t->re_modified = 0;
@@ -1368,7 +1368,7 @@ retry:	/* Find the slot for insertion. */
 
 	if (exact && flags == DB_NOOVERWRITE && !CD_ISSET(cp) &&
 	    !B_DISSET(GET_BKEYDATA(dbc->dbp, cp->page, cp->indx)->type)) {
-		ret = DB_KEYEXIST;
+		ret = DBC_ERR(dbc, DB_KEYEXIST);
 		goto err;
 	}
 
diff --git a/src/btree/bt_rsearch.c b/src/btree/bt_rsearch.c
index 36d1c667..4ada6e2d 100644
--- a/src/btree/bt_rsearch.c
+++ b/src/btree/bt_rsearch.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -147,7 +147,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
 				    __TLPUT(dbc, lock)) != 0 && ret == 0)
 					ret = t_ret;
 				if (ret == 0)
-					ret = DB_NOTFOUND;
+					ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto done;
 			}
 		}
@@ -197,7 +197,8 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
 						    lock)) != 0 && ret == 0)
 							ret = t_ret;
 						if (ret == 0)
-							ret = DB_NOTFOUND;
+							ret = DBC_ERR(dbc,
+							    DB_NOTFOUND);
 						goto err;
 					}
 				}
diff --git a/src/btree/bt_search.c b/src/btree/bt_search.c
index e809a852..e3d69d16 100644
--- a/src/btree/bt_search.c
+++ b/src/btree/bt_search.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -51,8 +51,9 @@
 
 /*
  * __bam_get_root --
- *	Fetch the root of a tree and see if we want to keep
- * it in the stack.
+ *	Try to appropriately lock and fetch the root page of a tree;
+ * if successful enter it into the cursor's stack; on error, leave the stack
+ * unchanged.
  *
  * PUBLIC: int __bam_get_root __P((DBC *, db_pgno_t, int, u_int32_t, int *));
  */
@@ -232,9 +233,11 @@ retry:	if (lock_mode == DB_LOCK_WRITE)
 		} else if (atomic_read(&mpf->mfp->multiversion) != 0 &&
 		    lock_mode == DB_LOCK_WRITE && (ret = __memp_dirty(mpf, &h,
 		    dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) {
-			(void)__memp_fput(mpf,
-			    dbc->thread_info, h, dbc->priority);
+			if (h != NULL)
+				(void)__memp_fput(mpf,
+					dbc->thread_info, h, dbc->priority);
 			(void)__LPUT(dbc, lock);
+			return (ret);
 		}
 	}
 
@@ -272,9 +275,10 @@ __bam_search(dbc, root_pgno, key, flags, slevel, recnop, exactp)
 	db_recno_t recno;
 	int adjust, cmp, deloffset, ret, set_stack, stack, t_ret;
 	int getlock, was_next;
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
 	u_int32_t get_mode, wait;
 	u_int8_t level, saved_level;
+	size_t pos, pos_h, pos_l;
 
 	if (F_ISSET(dbc, DBC_OPD))
 		LOCK_CHECK_OFF(dbc->thread_info);
@@ -288,6 +292,7 @@ __bam_search(dbc, root_pgno, key, flags, slevel, recnop, exactp)
 	t = dbp->bt_internal;
 	recno = 0;
 	t_ret = 0;
+	func = NULL;
 
 	BT_STK_CLR(cp);
 	LOCK_INIT(saved_lock);
@@ -339,11 +344,17 @@ retry:	if ((ret = __bam_get_root(dbc, start_pgno, slevel, flags, &stack)) != 0)
 
 	BT_STK_CLR(cp);
 
-	/* Choose a comparison function. */
+	/*
+	 * Choose a comparison function.
+	 * We apply the prefix search optimization only when there
+	 * is no user-specific comparsion function set.
+	 */
 	func = F_ISSET(dbc, DBC_OPD) ?
 	    (dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare) :
 	    t->bt_compare;
 
+	pos_h = 0;
+	pos_l = 0;
 	for (;;) {
 		if (TYPE(h) == P_LBTREE)
 			adjust = P_INDX;
@@ -389,9 +400,11 @@ retry:	if ((ret = __bam_get_root(dbc, start_pgno, slevel, flags, &stack)) != 0)
 		 * match on a leaf page, we're done.
 		 */
 		DB_BINARY_SEARCH_FOR(base, lim, NUM_ENT(h), adjust) {
+			/* We compare from the common prefix */
+			pos = pos_l > pos_h ? pos_h : pos_l;
 			DB_BINARY_SEARCH_INCR(indx, base, lim, adjust);
 			if ((ret = __bam_cmp(dbc, key, h, indx,
-			    func, &cmp)) != 0)
+			    func, &cmp, &pos)) != 0)
 				goto err;
 			if (cmp == 0) {
 				if (LEVEL(h) == LEAFLEVEL ||
@@ -403,9 +416,19 @@ retry:	if ((ret = __bam_get_root(dbc, start_pgno, slevel, flags, &stack)) != 0)
 				}
 				goto next;
 			}
-			if (cmp > 0)
+			/*
+			 * We have to maintain the offset in the keys where
+			 * we begin comparing for both ends of the key range
+			 * in which we are binary searching. So, update either
+			 * the high or low position here, depending on how
+			 * the comparison turned out.
+			 */
+			if (cmp > 0) {
 				DB_BINARY_SEARCH_SHIFT_BASE(indx, base,
 				    lim, adjust);
+				pos_l = pos;
+			} else
+				pos_h = pos;
 		}
 
 		/*
@@ -421,7 +444,7 @@ retry:	if ((ret = __bam_get_root(dbc, start_pgno, slevel, flags, &stack)) != 0)
 			*exactp = 0;
 
 			if (LF_ISSET(SR_EXACT)) {
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto err;
 			}
 
@@ -444,13 +467,13 @@ get_next:			/*
 				 * at the root if the tree recently collapsed.
 				 */
 				if (PGNO(h) == root_pgno) {
-					ret = DB_NOTFOUND;
+					ret = DBC_ERR(dbc, DB_NOTFOUND);
 					goto err;
 				}
 
 				indx = cp->sp->indx + 1;
 				if (indx == NUM_ENT(cp->sp->page)) {
-					ret = DB_NOTFOUND;
+					ret = DBC_ERR(dbc, DB_NOTFOUND);
 					cp->csp++;
 					goto err;
 				}
@@ -863,7 +886,7 @@ found:	*exactp = 1;
 		 * DB_NOTFOUND.
 		 */
 		if (B_DISSET(GET_BKEYDATA(dbp, h, indx + deloffset)->type)) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 8299c69a..f7719dc4 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -63,7 +63,7 @@ __bam_split(dbc, arg, root_pgnop)
 	db_pgno_t *root_pgnop;
 {
 	BTREE_CURSOR *cp;
-	DB_LOCK metalock, next_lock;
+	DB_LOCK meta_lock, next_lock;
 	enum { UP, DOWN } dir;
 	db_pgno_t pgno, next_pgno, root_pgno;
 	int exact, level, ret;
@@ -72,17 +72,16 @@ __bam_split(dbc, arg, root_pgnop)
 		LOCK_CHECK_OFF(dbc->thread_info);
 
 	cp = (BTREE_CURSOR *)dbc->internal;
+	LOCK_INIT(meta_lock);
 	LOCK_INIT(next_lock);
 	next_pgno = PGNO_INVALID;
 
 	/*
-	 * First get a lock on the metadata page, we will have to allocate
+	 * First get a lock on the metadata page; we will have to allocate
 	 * pages and cannot get a lock while we have the search tree pinned.
 	 */
-
 	pgno = PGNO_BASE_MD;
-	if ((ret = __db_lget(dbc,
-	    0, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
+	if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &meta_lock)) != 0)
 		goto err;
 	root_pgno = BAM_ROOT_PGNO(dbc);
 
@@ -189,7 +188,7 @@ no_split:		/* Once we've split the leaf page, we're done. */
 	if (root_pgnop != NULL)
 		*root_pgnop = BAM_ROOT_PGNO(dbc);
 err:
-done:	(void)__LPUT(dbc, metalock);
+done:	(void)__LPUT(dbc, meta_lock);
 	(void)__TLPUT(dbc, next_lock);
 
 	if (F_ISSET(dbc, DBC_OPD))
@@ -685,6 +684,7 @@ __bam_broot(dbc, rootp, split, lp, rp)
 			DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data));
 			DB_SET_DBT(data, &bo, BOVERFLOW_SIZE);
 			break;
+		case B_BLOB:
 		case B_DUPLICATE:
 		default:
 			goto pgfmt;
@@ -772,7 +772,30 @@ __ram_root(dbc, rootp, lp, rp)
 
 /*
  * __bam_pinsert --
- *	Insert a new key into a parent page, completing the split.
+ *
+ *	Construct a internal index item and place it in the parent page. It is
+ *	primarily used by __bam_page() to add a new page into the tree. The sole
+ *	other use is by __bam_pupdate() after a reverse split or compact has
+ *	removed pages underneath it, in order to replace the parent's key/nrecs
+ *	to match the new subtree.
+ *
+ * Parameters:
+ *	parent	- the page from the cursor stack to be modifed. The next entry
+ *		  in the stack (i.e., the next lower level in the tree) contains
+ *		  the key of the new item. The indx field must have been set
+ *		  when searching down the tree, to point to the new/replaced
+ *		  parent item.
+ *	split	- the indx in the cursor stack of the 'source' of the new item.
+ *	lchild	- the left child page is used *only* when attempting to use
+ *		  prefix key compression on a leaf (data) page.
+ *	rchild	- right child page. The source of the pgno of the new item.
+ *	flags	- BPI_REPLACE | BPI_NORENCUM
+ *		  BPI_NOLOGGING
+ *
+ *	The pgno of the item always comes from rchild, which often is the same
+ *	as parent[1].page. The key for DB_BTREE comes from the next lower page
+ *	in the stack under parent, not from either lchild or rchild parameter --
+ *	though often rchild is a copy of parent[1].page.
  *
  * PUBLIC: int __bam_pinsert
  * PUBLIC:     __P((DBC *, EPG *, u_int32_t, PAGE *, PAGE *, int));
@@ -867,12 +890,27 @@ __bam_pinsert(dbc, parent, split, lchild, rchild, flags)
 			size = BINTERNAL_SIZE(child_bi->len);
 			break;
 		case B_OVERFLOW:
-			/* Reuse the overflow key. */
+			/* Copy the overflow key. */
 			child_bo = (BOVERFLOW *)child_bi->data;
 			memset(&bo, 0, sizeof(bo));
 			bo.type = B_OVERFLOW;
 			bo.tlen = child_bo->tlen;
-			bo.pgno = child_bo->pgno;
+			if (LF_ISSET(BPI_REPLACE)) {
+				/*
+				 * Replace (compact or reverse split) needs to
+				 * copy in case the data item gets removed.
+				 */
+				memset(&hdr, 0, sizeof(hdr));
+				if ((ret = __db_goff(dbc, &hdr,
+				    child_bo->tlen, child_bo->pgno,
+				    &hdr.data, &hdr.size)) == 0)
+					ret = __db_poff(dbc, &hdr, &bo.pgno);
+				if (hdr.data != NULL)
+					__os_free(dbp->env, hdr.data);
+				if (ret != 0)
+					return (ret);
+			} else
+				bo.pgno = child_bo->pgno;
 			bi.len = BOVERFLOW_SIZE;
 			B_TSET(bi.type, B_OVERFLOW);
 			bi.pgno = rchild->pgno;
@@ -881,6 +919,7 @@ __bam_pinsert(dbc, parent, split, lchild, rchild, flags)
 			DB_SET_DBT(data, &bo, BOVERFLOW_SIZE);
 			size = BINTERNAL_SIZE(BOVERFLOW_SIZE);
 			break;
+		case B_BLOB:
 		case B_DUPLICATE:
 		default:
 			goto pgfmt;
@@ -982,8 +1021,8 @@ noprefix:		if (P_FREESPACE(dbp, ppage) + oldsize < nbytes)
 			DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data));
 			DB_SET_DBT(data, &bo, BOVERFLOW_SIZE);
 			size = BINTERNAL_SIZE(BOVERFLOW_SIZE);
-
 			break;
+		case B_BLOB:
 		case B_DUPLICATE:
 		default:
 			goto pgfmt;
@@ -1153,23 +1192,32 @@ __bam_psplit(dbc, cp, lp, rp, splitret)
 				nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE);
 			break;
 		case P_LBTREE:
-			if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
-			    B_KEYDATA)
-				nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp,
-				    pp, off)->len);
-			else
+			switch (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type)) {
+			case B_KEYDATA:
+				nbytes += BKEYDATA_SIZE(
+				    GET_BKEYDATA(dbp, pp, off)->len);
+				break;
+			case B_BLOB:
+				nbytes += BBLOB_SIZE;
+				break;
+			default:
 				nbytes += BOVERFLOW_SIZE;
-
+			}
 			++off;
 			/* FALLTHROUGH */
 		case P_LDUP:
 		case P_LRECNO:
-			if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
-			    B_KEYDATA)
-				nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp,
-				    pp, off)->len);
-			else
+			switch (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type)) {
+			case B_KEYDATA:
+				nbytes += BKEYDATA_SIZE(
+				    GET_BKEYDATA(dbp, pp, off)->len);
+				break;
+			case B_BLOB:
+				nbytes += BBLOB_SIZE;
+				break;
+			default:
 				nbytes += BOVERFLOW_SIZE;
+			}
 			break;
 		case P_IRECNO:
 			nbytes += RINTERNAL_SIZE;
@@ -1269,7 +1317,7 @@ __bam_copy(dbp, pp, cp, nxt, stop)
 	PAGE *pp, *cp;
 	u_int32_t nxt, stop;
 {
-	BINTERNAL internal;
+	BINTERNAL *bi, internal;
 	db_indx_t *cinp, nbytes, off, *pinp;
 
 	cinp = P_INP(dbp, cp);
@@ -1302,12 +1350,17 @@ __bam_copy(dbp, pp, cp, nxt, stop)
 			/* FALLTHROUGH */
 		case P_LDUP:
 		case P_LRECNO:
-			if (B_TYPE(GET_BKEYDATA(dbp, pp, nxt)->type) ==
-			    B_KEYDATA)
-				nbytes = BKEYDATA_SIZE(GET_BKEYDATA(dbp,
-				    pp, nxt)->len);
-			else
+			switch (B_TYPE(GET_BKEYDATA(dbp, pp, nxt)->type)) {
+			case B_KEYDATA:
+				nbytes = BKEYDATA_SIZE(
+				    GET_BKEYDATA(dbp, pp, nxt)->len);
+				break;
+			case B_BLOB:
+				nbytes = BBLOB_SIZE;
+				break;
+			default:
 				nbytes = BOVERFLOW_SIZE;
+			}
 			break;
 		case P_IRECNO:
 			nbytes = RINTERNAL_SIZE;
@@ -1316,17 +1369,18 @@ __bam_copy(dbp, pp, cp, nxt, stop)
 			return (__db_pgfmt(dbp->env, pp->pgno));
 		}
 		cinp[off] = HOFFSET(cp) -= nbytes;
+		/* Minimize the first key on an IBTREE page; it isn't valid. */
+		bi = GET_BINTERNAL(dbp, pp, nxt);
 		if (off == 0 && nxt != 0 && TYPE(pp) == P_IBTREE) {
 			internal.len = 0;
 			UMRW_SET(internal.unused);
 			internal.type = B_KEYDATA;
-			internal.pgno = GET_BINTERNAL(dbp, pp, nxt)->pgno;
-			internal.nrecs = GET_BINTERNAL(dbp, pp, nxt)->nrecs;
+			internal.pgno = bi->pgno;
+			internal.nrecs = bi->nrecs;
 			memcpy(P_ENTRY(dbp, cp, off), &internal, nbytes);
 		}
 		else
-			memcpy(P_ENTRY(dbp, cp, off),
-			     P_ENTRY(dbp, pp, nxt), nbytes);
+			memcpy(P_ENTRY(dbp, cp, off), bi, nbytes);
 	}
 	return (0);
 }
diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c
index 668c4fdb..04c0fbcb 100644
--- a/src/btree/bt_stat.c
+++ b/src/btree/bt_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -278,6 +278,8 @@ __bam_stat_print(dbc, flags)
 		    "%#x\tFixed-length record pad", (u_int)sp->bt_re_pad);
 	}
 	__db_dl(env,
+	    "Number of pages in the database", (u_long)sp->bt_pagecnt);
+	__db_dl(env,
 	    "Underlying database page size", (u_long)sp->bt_pagesize);
 	if (dbp->type == DB_BTREE)
 		__db_dl(env, "Overflow key/data size",
@@ -288,6 +290,10 @@ __bam_stat_print(dbc, flags)
 	    "Number of records in the tree", (u_long)sp->bt_nkeys);
 	__db_dl(env,
 	    "Number of data items in the tree", (u_long)sp->bt_ndata);
+	if (dbp->type == DB_BTREE) {
+		__db_dl(env,
+		    "Number of blobs in the tree", (u_long)sp->bt_nblobs);
+	}
 
 	__db_dl(env,
 	    "Number of tree internal pages", (u_long)sp->bt_int_pg);
@@ -372,6 +378,10 @@ __bam_stat_callback(dbc, h, cookie, putp)
 			/* Ignore off-page duplicates. */
 			if (B_TYPE(type) != B_DUPLICATE)
 				++sp->bt_ndata;
+
+			/* Count blobs. */
+			if (B_TYPE(type) == B_BLOB)
+				++sp->bt_nblobs;
 		}
 
 		++sp->bt_leaf_pg;
diff --git a/src/btree/bt_upgrade.c b/src/btree/bt_upgrade.c
index c9123351..66e27d56 100644
--- a/src/btree/bt_upgrade.c
+++ b/src/btree/bt_upgrade.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_upgrade.h"
 #include "dbinc/btree.h"
@@ -151,3 +152,94 @@ __bam_31_lbtree(dbp, real_name, flags, fhp, h, dirtyp)
 
 	return (ret);
 }
+
+/*
+ * __bam_60_btreemeta--
+ *	Upgrade the version number.
+ *
+ * PUBLIC: int __bam_60_btreemeta
+ * PUBLIC:      __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+ */
+int
+__bam_60_btreemeta(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	BTMETA33 *bmeta;
+
+	COMPQUIET(flags, 0);
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(fhp, NULL);
+	COMPQUIET(dbp, NULL);
+	bmeta = (BTMETA33 *)h;
+
+	bmeta->dbmeta.version = 10;
+	*dirtyp = 1;
+
+	return (0);
+}
+
+/*
+ * __bam_60_lbtree --
+ *	Upgrade the blob records on the database btree leaf pages.
+ *
+ * PUBLIC: int __bam_60_lbtree
+ * PUBLIC:      __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+ */
+int
+__bam_60_lbtree(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	BBLOB60 bl60;
+	BBLOB60P1 bl60p1;
+	BKEYDATA *bk;
+	db_seq_t blob_id, blob_size, file_id, sdb_id;
+	db_indx_t indx;
+	int ret;
+
+	COMPQUIET(flags, 0);
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(fhp, NULL);
+	ret = 0;
+
+	DB_ASSERT(dbp->env, BBLOB60_SIZE == BBLOB_SIZE);
+	for (indx = O_INDX; indx < NUM_ENT(h); indx += P_INDX) {
+		bk = GET_BKEYDATA(dbp, h, indx);
+		if (B_TYPE(bk->type) == B_BLOB ) {
+			memcpy(&bl60, bk, BBLOB60_SIZE);
+			memset(&bl60p1, 0, BBLOB_SIZE);
+			bl60p1.type = bl60.type;
+			bl60p1.len = BBLOB_DSIZE;
+			bl60p1.encoding = bl60.encoding;
+			GET_BLOB60_ID(dbp->env, bl60, blob_id, ret);
+			if (ret != 0)
+				return (ret);
+			GET_BLOB60_SIZE(dbp->env, bl60, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			GET_BLOB60_FILE_ID(dbp->env, &bl60, file_id, ret);
+			if (ret != 0)
+				return (ret);
+			GET_BLOB60_SDB_ID(dbp->env, &bl60, sdb_id, ret);
+			if (ret != 0)
+				return (ret);
+			SET_BLOB_ID(&bl60p1, blob_id, BBLOB60P1);
+			SET_BLOB_SIZE(&bl60p1, blob_size, BBLOB60P1);
+			SET_BLOB_FILE_ID(&bl60p1, file_id, BBLOB60P1);
+			SET_BLOB_SDB_ID(&bl60p1, sdb_id, BBLOB60P1);
+			memcpy(bk, &bl60p1, BBLOB_SIZE);
+			*dirtyp = 1;
+		}
+	}
+
+	return (ret);
+}
diff --git a/src/btree/bt_verify.c b/src/btree/bt_verify.c
index 99354a58..8ceb50e6 100644
--- a/src/btree/bt_verify.c
+++ b/src/btree/bt_verify.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_verify.h"
 #include "dbinc/btree.h"
@@ -20,8 +21,8 @@ static int __bam_safe_getdata __P((DB *, DB_THREAD_INFO *,
 static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
     db_indx_t *, u_int32_t));
 static int __bam_vrfy_treeorder __P((DB *, DB_THREAD_INFO *, PAGE *,
-    BINTERNAL *, BINTERNAL *, int (*)(DB *, const DBT *, const DBT *),
-    u_int32_t));
+    BINTERNAL *, BINTERNAL *,
+    int (*)(DB *, const DBT *, const DBT *, size_t *), u_int32_t));
 static int __ram_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
     db_indx_t *, u_int32_t));
 
@@ -44,6 +45,7 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
 	VRFY_PAGEINFO *pip;
 	int isbad, t_ret, ret;
 	db_indx_t ovflsize;
+	db_seq_t blob_id;
 
 	env = dbp->env;
 	isbad = 0;
@@ -201,6 +203,56 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
 		    "%lu %lu"), (u_long)pgno, (u_long)pip->re_len));
 	}
 
+/*
+ * Where 64-bit integer support is not available,
+ * return an error if the file has any blobs.
+ */
+	t_ret = 0;
+#ifdef HAVE_64BIT_TYPES
+	GET_BLOB_FILE_ID(env, meta, blob_id, t_ret);
+	if (t_ret != 0) {
+		isbad = 1;
+		EPRINT((env, DB_STR_A("1187",
+		    "Page %lu: blob file id overflow.", "%lu"), (u_long)pgno));
+		if (ret == 0)
+			ret = t_ret;
+	}
+	t_ret = 0;
+	GET_BLOB_SDB_ID(env, meta, blob_id, t_ret);
+	if (t_ret != 0) {
+		isbad = 1;
+		EPRINT((env, DB_STR_A("1188",
+		    "Page %lu: blob subdatabase id overflow.",
+		    "%lu"), (u_long)pgno));
+		if (ret == 0)
+			ret = t_ret;
+	}
+#else /* HAVE_64BIT_TYPES */
+	/*
+	 * db_seq_t is an int on systems that do not have 64 integers, so
+	 * this will compile and run.
+	 */
+	GET_BLOB_FILE_ID(env, meta, blob_id, t_ret);
+	if (t_ret != 0 || blob_id != 0) {
+		isbad = 1;
+		EPRINT((env, DB_STR_A("1200",
+		    "Page %lu: blobs require 64 integer compiler support.",
+		    "%lu"), (u_long)pgno));
+		if (ret == 0)
+			ret = t_ret;
+	}
+	t_ret = 0;
+	GET_BLOB_SDB_ID(env, meta, blob_id, t_ret);
+	if (t_ret != 0 || blob_id != 0) {
+		isbad = 1;
+		EPRINT((env, DB_STR_A("1201",
+		    "Page %lu: blobs require 64 integer compiler support.",
+		    "%lu"), (u_long)pgno));
+		if (ret == 0)
+			ret = t_ret;
+	}
+#endif
+
 	/*
 	 * We do not check that the rest of the page is 0, because it may
 	 * not be and may still be correct.
@@ -268,8 +320,7 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
 
 	if (F_ISSET(pip, VRFY_HAS_DUPS)) {
 		EPRINT((env, DB_STR_A("1043",
-		    "Page %lu: Recno database has dups",
-		    "%lu"), (u_long)pgno));
+		    "Page %lu: Recno database has dups", "%lu"), (u_long)pgno));
 		ret = DB_VERIFY_BAD;
 		goto err;
 	}
@@ -547,12 +598,15 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
 	db_indx_t *nentriesp;
 	u_int32_t flags;
 {
+	BBLOB bl;
 	BKEYDATA *bk;
 	BOVERFLOW *bo;
 	ENV *env;
 	VRFY_CHILDINFO child;
 	VRFY_ITEM *pagelayout;
 	VRFY_PAGEINFO *pip;
+	off_t blob_size;
+	db_seq_t blob_id, file_id, sdb_id;
 	u_int32_t himark, offset;		/*
 						 * These would be db_indx_ts
 						 * but for alignment.
@@ -563,6 +617,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
 	env = dbp->env;
 	isbad = isdupitem = 0;
 	nentries = 0;
+	file_id = sdb_id = 0;
 	memset(&child, 0, sizeof(VRFY_CHILDINFO));
 	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
 		return (ret);
@@ -668,6 +723,9 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
 			else
 				endoff = offset + BKEYDATA_SIZE(bk->len) - 1;
 			break;
+		case B_BLOB:
+			endoff = offset + BBLOB_SIZE - 1;
+			break;
 		case B_DUPLICATE:
 			/*
 			 * Flag that we have dups; we'll check whether
@@ -731,6 +789,52 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
 			 * already been done.
 			 */
 			break;
+		case B_BLOB:
+			if (TYPE(h) == P_IBTREE) {
+				isbad = 1;
+				EPRINT((env, DB_STR_A("1189",
+		"Page %lu: blob item in internal btree page at item %lu",
+				    "%lu %lu"), (u_long)pgno, (u_long)i));
+				break;
+			} else if (TYPE(h) == P_LRECNO) {
+				isbad = 1;
+				EPRINT((env, DB_STR_A("1190",
+		"Page %lu: blob item referenced by recno page at item %lu",
+				    "%lu %lu"), (u_long)pgno, (u_long)i));
+				break;
+			}
+			/*
+			 * Blob item.  Check that the blob file exists and is
+			 * the same file size as is stored in the database
+			 * record.
+			 */
+			memcpy(&bl, bk, BBLOB_SIZE);
+			blob_id = (db_seq_t)bl.id;
+			GET_BLOB_SIZE(env, bl, blob_size, ret);
+			if (ret != 0 || blob_size < 0) {
+				isbad = 1;
+				EPRINT((env, DB_STR_A("1192",
+		"Page %lu: blob file size value has overflowed at item %lu",
+				    "%lu %lu"), (u_long)pgno, (u_long)i));
+				break;
+			}
+			file_id = (db_seq_t)bl.file_id;
+			sdb_id = (db_seq_t)bl.sdb_id;
+			if (file_id == 0 && sdb_id == 0) {
+				isbad = 1;
+				EPRINT((dbp->env, DB_STR_A("1195",
+			"Page %lu: invalid blob dir ids %llu %llu at item %lu",
+				    "%lu %ll %ll %lu"), (u_long)pip->pgno,
+				    (long long)file_id,
+				    (long long)sdb_id, (u_long)i));
+				break;
+			}
+			if ((ret = __blob_vrfy(env, blob_id,
+			    blob_size, file_id, sdb_id, pgno, flags)) != 0) {
+				isbad = 1;
+				break;
+			}
+			break;
 		case B_DUPLICATE:
 			if (TYPE(h) == P_IBTREE) {
 				isbad = 1;
@@ -751,9 +855,17 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
 			    (BOVERFLOW *)(((BINTERNAL *)bk)->data) :
 			    (BOVERFLOW *)bk;
 
-			if (B_TYPE(bk->type) == B_OVERFLOW)
+			if (B_TYPE(bk->type) == B_OVERFLOW) {
+				if (TYPE(h) == P_IBTREE &&
+				    bk->len != BOVERFLOW_SIZE) {
+					EPRINT((env, DB_STR_A("1196",
+			    "Page %lu: bad length %u in B_OVERFLOW item %lu",
+					    "%lu %u %lu"),
+					    (u_long)pgno, bk->len, (u_long)i));
+					isbad = 1;
+				}
 				/* Make sure tlen is reasonable. */
-				if (bo->tlen > dbp->pgsize * vdp->last_pgno) {
+				if (bo->tlen >= dbp->pgsize * vdp->last_pgno) {
 					isbad = 1;
 					EPRINT((env, DB_STR_A("1056",
 				"Page %lu: impossible tlen %lu, item %lu",
@@ -762,6 +874,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
 					/* Don't save as a child. */
 					break;
 				}
+			}
 
 			if (!IS_VALID_PGNO(bo->pgno) || bo->pgno == pgno ||
 			    bo->pgno == PGNO_INVALID) {
@@ -918,8 +1031,8 @@ __bam_vrfy_itemorder(dbp, vdp, ip, h, pgno, nentries, ovflok, hasdups, flags)
 	VRFY_PAGEINFO *pip;
 	db_indx_t i, *inp;
 	int adj, cmp, freedup_1, freedup_2, isbad, ret, t_ret;
-	int (*dupfunc) __P((DB *, const DBT *, const DBT *));
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*dupfunc) __P((DB *, const DBT *, const DBT *, size_t *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
 	void *buf1, *buf2, *tmpbuf;
 
 	/*
@@ -1066,6 +1179,11 @@ retry:	p1 = &dbta;
 			if (B_TYPE(bk->type) == B_OVERFLOW) {
 				bo = (BOVERFLOW *)bk;
 				goto overflow;
+			} else if (B_TYPE(bk->type) == B_BLOB) {
+				isbad = 1;
+				EPRINT((env, DB_STR_A("1197",
+				    "Page %lu: Blob found in key item %lu",
+				    "%lu %lu"), (u_long)pgno, (u_long)i));
 			} else {
 				p2->data = bk->data;
 				p2->size = bk->len;
@@ -1124,7 +1242,8 @@ overflow:		if (!ovflok) {
 
 		/* Compare with the last key. */
 		if (p1->data != NULL && p2->data != NULL) {
-			cmp = inp[i] == inp[i - adj] ? 0 : func(dbp, p1, p2);
+			cmp = inp[i] == inp[i - adj] ? 0 :
+			    func(dbp, p1, p2, NULL);
 
 			/* comparison succeeded */
 			if (cmp > 0) {
@@ -1236,8 +1355,8 @@ overflow:		if (!ovflok) {
 					 * until we do the structure check
 					 * and see whether DUPSORT is set.
 					 */
-					if (dupfunc(dbp, &dup_1, &dup_2) > 0 &&
-					    pip != NULL)
+					if (dupfunc(dbp, &dup_1, &dup_2,
+					    NULL) > 0 && pip != NULL)
 						F_SET(pip, VRFY_DUPS_UNSORTED);
 
 					if (freedup_1)
@@ -1409,7 +1528,7 @@ __bam_vrfy_subtree(dbp, vdp, pgno, l, r, flags, levelp, nrecsp, relenp)
 	db_recno_t child_nrecs, nrecs;
 	u_int32_t child_level, child_relen, j, level, relen, stflags;
 	u_int8_t leaf_type;
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
 	int isbad, p, ret, t_ret, toplevel;
 
 	if (levelp != NULL)	/* Don't leave uninitialized on error. */
@@ -1524,7 +1643,7 @@ __bam_vrfy_subtree(dbp, vdp, pgno, l, r, flags, levelp, nrecsp, relenp)
 			 * Don't do the prev/next_pgno checks if we've lost
 			 * leaf pages due to another corruption.
 			 */
-			if (!F_ISSET(vdp, VRFY_LEAFCHAIN_BROKEN)) {
+			if (!F_ISSET(vdp, SALVAGE_LEAFCHAIN_BROKEN)) {
 				if (pip->pgno != vdp->next_pgno) {
 					isbad = 1;
 					EPRINT((env, DB_STR_A("1075",
@@ -1547,7 +1666,7 @@ bad_prev:				isbad = 1;
 		}
 		vdp->prev_pgno = pip->pgno;
 		vdp->next_pgno = pip->next_pgno;
-		F_CLR(vdp, VRFY_LEAFCHAIN_BROKEN);
+		F_CLR(vdp, SALVAGE_LEAFCHAIN_BROKEN);
 
 		/*
 		 * Overflow pages are common to all three leaf types;
@@ -1694,7 +1813,7 @@ bad_prev:				isbad = 1;
 		 * spew error messages about erroneous prev/next_pgnos,
 		 * since that's probably not the real problem.
 		 */
-		F_SET(vdp, VRFY_LEAFCHAIN_BROKEN);
+		F_SET(vdp, SALVAGE_LEAFCHAIN_BROKEN);
 
 		ret = DB_VERIFY_BAD;
 		goto err;
@@ -2042,7 +2161,7 @@ __bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags)
 	DB_THREAD_INFO *ip;
 	PAGE *h;
 	BINTERNAL *lp, *rp;
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
 	u_int32_t flags;
 {
 	BOVERFLOW *bo;
@@ -2050,7 +2169,7 @@ __bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags)
 	DBT dbt;
 	ENV *env;
 	db_indx_t last;
-	int ret, cmp;
+	int cmp, ret, t_ret;
 
 	env = dbp->env;
 	memset(&dbt, 0, sizeof(DBT));
@@ -2077,7 +2196,6 @@ __bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags)
 		return (__db_unknown_path(env, "__bam_vrfy_treeorder"));
 	}
 
-	/* Populate a dummy cursor. */
 	if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE,
 	    PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
 		return (ret);
@@ -2095,9 +2213,6 @@ __bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags)
 	 * parent and falsely report a failure.)
 	 */
 	if (lp != NULL && TYPE(h) != P_IBTREE) {
-		if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE,
-		    PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
-			return (ret);
 		if (lp->type == B_KEYDATA) {
 			dbt.data = lp->data;
 			dbt.size = lp->len;
@@ -2105,13 +2220,13 @@ __bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags)
 			bo = (BOVERFLOW *)lp->data;
 			if ((ret = __db_goff(dbc, &dbt,
 			    bo->tlen, bo->pgno, NULL, NULL)) != 0)
-				return (ret);
-		} else
-			return (
-			    __db_unknown_path(env, "__bam_vrfy_treeorder"));
+				goto err;
+		} else {
+			ret = __db_unknown_path(env, "__bam_vrfy_treeorder");
+			goto err;
+		}
 
-		/* On error, fall through, free if needed, and return. */
-		if ((ret = __bam_cmp(dbc, &dbt, h, 0, func, &cmp)) == 0) {
+		if ((ret = __bam_cmp(dbc, &dbt, h, 0, func, &cmp, NULL)) == 0) {
 			if (cmp > 0) {
 				EPRINT((env, DB_STR_A("1092",
 	    "Page %lu: first item on page sorted greater than parent entry",
@@ -2126,7 +2241,7 @@ __bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags)
 		if (dbt.data != lp->data)
 			__os_ufree(env, dbt.data);
 		if (ret != 0)
-			return (ret);
+			goto err;
 	}
 
 	if (rp != NULL) {
@@ -2137,13 +2252,14 @@ __bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags)
 			bo = (BOVERFLOW *)rp->data;
 			if ((ret = __db_goff(dbc, &dbt,
 			    bo->tlen, bo->pgno, NULL, NULL)) != 0)
-				return (ret);
-		} else
-			return (
-			    __db_unknown_path(env, "__bam_vrfy_treeorder"));
+				goto err;
+		} else {
+			ret = __db_unknown_path(env, "__bam_vrfy_treeorder");
+			goto err;
+		}
 
-		/* On error, fall through, free if needed, and return. */
-		if ((ret = __bam_cmp(dbc, &dbt, h, last, func, &cmp)) == 0) {
+		if ((ret = __bam_cmp(dbc,
+		    &dbt, h, last, func, &cmp, NULL)) == 0) {
 			if (cmp < 0) {
 				EPRINT((env, DB_STR_A("1094",
 	    "Page %lu: last item on page sorted greater than parent entry",
@@ -2158,6 +2274,9 @@ __bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags)
 		if (dbt.data != rp->data)
 			__os_ufree(env, dbt.data);
 	}
+err:
+	if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
+		ret = t_ret;
 
 	return (ret);
 }
@@ -2186,14 +2305,20 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
 {
 	BKEYDATA *bk;
 	BOVERFLOW *bo;
+	BBLOB bl;
 	DBT dbt, repldbt, unknown_key, unknown_data;
 	ENV *env;
 	VRFY_ITEM *pgmap;
 	db_indx_t i, last, beg, end, *inp;
 	db_pgno_t ovflpg;
+	off_t blob_size, blob_offset, remaining;
+	u_int32_t blob_buf_size;
+	u_int8_t *blob_buf;
 	u_int32_t himark, ovfl_bufsz;
+	db_seq_t blob_id, file_id, sdb_id;
 	void *ovflbuf;
 	int adj, ret, t_ret, t2_ret;
+	char *prefix;
 #ifdef HAVE_COMPRESSION
 	DBT kcpy, *last_key;
 	int unknown_dup_key;
@@ -2202,6 +2327,8 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
 	env = dbp->env;
 	ovflbuf = pgmap = NULL;
 	inp = P_INP(dbp, h);
+	blob_buf_size = 0;
+	blob_buf = NULL;
 
 	memset(&dbt, 0, sizeof(DBT));
 	dbt.flags = DB_DBT_REALLOC;
@@ -2543,6 +2670,68 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
 			}
 #endif
 			break;
+		case B_BLOB:
+			memcpy(&bl, bk, BBLOB_SIZE);
+			blob_id = (db_seq_t)bl.id;
+			GET_BLOB_SIZE(env, bl, blob_size, ret);
+			if (ret != 0 || blob_size < 0)
+				goto err;
+			file_id = (db_seq_t)bl.file_id;
+			sdb_id = (db_seq_t)bl.sdb_id;
+
+			/* Read the blob, in pieces if it is too large.*/
+			blob_offset = 0;
+			if (blob_size > MEGABYTE) {
+				if (blob_buf_size < MEGABYTE) {
+					if ((ret = __os_realloc(
+					    env,  MEGABYTE, &blob_buf)) != 0)
+						goto err;
+					blob_buf_size = MEGABYTE;
+				}
+			} else if (blob_buf_size < blob_size) {
+				blob_buf_size = (u_int32_t)blob_size;
+				if ((ret = __os_realloc(env,
+				    blob_buf_size, &blob_buf)) != 0)
+					goto err;
+			}
+			dbt.data = blob_buf;
+			dbt.ulen = blob_buf_size;
+			remaining = blob_size;
+			prefix = " ";
+			do {
+				if ((ret = __blob_salvage(env, blob_id,
+				    blob_offset,
+				    ((remaining < blob_buf_size) ?
+				    (size_t)remaining : blob_buf_size),
+				    file_id, sdb_id, &dbt)) != 0) {
+					if (LF_ISSET(DB_AGGRESSIVE)) {
+						ret = DB_VERIFY_BAD;
+						break;
+					}
+					F_CLR(vdp, SALVAGE_STREAM_BLOB);
+					goto err;
+				}
+				if (remaining > blob_buf_size)
+					F_SET(vdp, SALVAGE_STREAM_BLOB);
+				else
+					F_CLR(vdp, SALVAGE_STREAM_BLOB);
+				if ((t_ret = __db_vrfy_prdbt(
+				    &dbt, 0, prefix,
+				    handle, callback, 0, 0, vdp)) != 0) {
+					if (ret == 0)
+						ret = t_ret;
+					F_CLR(vdp, SALVAGE_STREAM_BLOB);
+					goto err;
+				}
+				prefix = NULL;
+				blob_offset += dbt.size;
+				if (remaining < blob_buf_size)
+					remaining = 0;
+				else
+					remaining -= blob_buf_size;
+			} while (remaining > 0);
+			F_CLR(vdp, SALVAGE_STREAM_BLOB);
+			break;
 		default:
 			/*
 			 * We should never get here; __db_vrfy_inpitem should
@@ -2572,6 +2761,8 @@ err:	if (pgmap != NULL)
 		__os_free(env, ovflbuf);
 	if (repldbt.data != NULL)
 		__os_free(env, repldbt.data);
+	if (blob_buf != NULL)
+		__os_free(env, blob_buf);
 #ifdef HAVE_COMPRESSION
 	if (kcpy.data != NULL)
 		__os_free(env, kcpy.data);
diff --git a/src/btree/btree.src b/src/btree/btree.src
index 08e5a206..02088b88 100644
--- a/src/btree/btree.src
+++ b/src/btree/btree.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/clib/bsearch.c b/src/clib/bsearch.c
index 3e55009a..de15358b 100644
--- a/src/clib/bsearch.c
+++ b/src/clib/bsearch.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/clib/getcwd.c b/src/clib/getcwd.c
index 83e8b62d..028fc3f2 100644
--- a/src/clib/getcwd.c
+++ b/src/clib/getcwd.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1989, 1991, 1993
diff --git a/src/clib/getopt.c b/src/clib/getopt.c
index ca98e7f1..4e4dc6c8 100644
--- a/src/clib/getopt.c
+++ b/src/clib/getopt.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1987, 1993, 1994
diff --git a/src/clib/isalpha.c b/src/clib/isalpha.c
index 6bf1ffb7..39114c08 100644
--- a/src/clib/isalpha.c
+++ b/src/clib/isalpha.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/clib/isdigit.c b/src/clib/isdigit.c
index d1b2a65e..e4e1d3d8 100644
--- a/src/clib/isdigit.c
+++ b/src/clib/isdigit.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/clib/isprint.c b/src/clib/isprint.c
index 685e20ea..310894d5 100644
--- a/src/clib/isprint.c
+++ b/src/clib/isprint.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/clib/isspace.c b/src/clib/isspace.c
index df450d3b..48a20617 100644
--- a/src/clib/isspace.c
+++ b/src/clib/isspace.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/clib/memcmp.c b/src/clib/memcmp.c
index 7fec827c..7db1d3ad 100644
--- a/src/clib/memcmp.c
+++ b/src/clib/memcmp.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993
diff --git a/src/clib/memmove.c b/src/clib/memmove.c
index 34a181cc..866843dc 100644
--- a/src/clib/memmove.c
+++ b/src/clib/memmove.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993
diff --git a/src/clib/printf.c b/src/clib/printf.c
index a2c01296..f36eeb15 100644
--- a/src/clib/printf.c
+++ b/src/clib/printf.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/clib/raise.c b/src/clib/raise.c
index ad0e567f..223f797f 100644
--- a/src/clib/raise.c
+++ b/src/clib/raise.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/clib/rand.c b/src/clib/rand.c
index 6b810060..426627a9 100644
--- a/src/clib/rand.c
+++ b/src/clib/rand.c
@@ -13,6 +13,7 @@
  * PUBLIC: void srand __P((unsigned int));
  * PUBLIC: #endif
  */
+#ifndef HAVE_RAND
 int rand(void)	/* RAND_MAX assumed to be 32767 */
 {
 	DB_GLOBAL(rand_next) = DB_GLOBAL(rand_next) * 1103515245 + 12345;
@@ -23,3 +24,4 @@ void srand(unsigned int seed)
 {
 	DB_GLOBAL(rand_next) = seed;
 }
+#endif
diff --git a/src/clib/snprintf.c b/src/clib/snprintf.c
index 6b31d850..8f1a6855 100644
--- a/src/clib/snprintf.c
+++ b/src/clib/snprintf.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/clib/strerror.c b/src/clib/strerror.c
index 62bd7dd5..b2d148e4 100644
--- a/src/clib/strerror.c
+++ b/src/clib/strerror.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1988, 1993
diff --git a/src/clib/time.c b/src/clib/time.c
index abc2ab2d..3a3f0c3e 100644
--- a/src/clib/time.c
+++ b/src/clib/time.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2006, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2006, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/clock.c b/src/common/clock.c
index e1f917af..21a17de6 100644
--- a/src/common/clock.c
+++ b/src/common/clock.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/crypto_stub.c b/src/common/crypto_stub.c
index 95faebdb..b961a620 100644
--- a/src/common/crypto_stub.c
+++ b/src/common/crypto_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/db_byteorder.c b/src/common/db_byteorder.c
index 71428f0a..13bc2d52 100644
--- a/src/common/db_byteorder.c
+++ b/src/common/db_byteorder.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/db_compint.c b/src/common/db_compint.c
index 9f5ccf9a..10317b2f 100644
--- a/src/common/db_compint.c
+++ b/src/common/db_compint.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 
 #include "db_config.h"
diff --git a/src/common/db_err.c b/src/common/db_err.c
index 6edc37b6..7acaa174 100644
--- a/src/common/db_err.c
+++ b/src/common/db_err.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -18,6 +18,11 @@
 static void __db_msgcall __P((const DB_ENV *, const char *, va_list));
 static void __db_msgfile __P((const DB_ENV *, const char *, va_list));
 
+#if defined(HAVE_ERROR_HISTORY)
+static void __db_thread_once_func __P((void));
+static void __db_deferred_free __P((void *));
+#endif
+
 /*
  * __db_fchk --
  *	General flags checking routine.
@@ -62,6 +67,9 @@ __db_ferr(env, name, iscombo)
 	const char *name;
 	int iscombo;
 {
+	int ret;
+
+	ret = USR_ERR(env, EINVAL);
 	if (iscombo)
 		__db_errx(env, DB_STR_A("0054",
 		    "illegal flag combination specified to %s", "%s"), name);
@@ -69,7 +77,7 @@ __db_ferr(env, name, iscombo)
 		__db_errx(env, DB_STR_A("0055",
 		    "illegal flag specified to %s", "%s"), name);
 
-	return (EINVAL);
+	return (ret);
 }
 
 /*
@@ -145,9 +153,24 @@ __db_assert(env, e, file, line)
 	if (DB_GLOBAL(j_assert) != NULL)
 		DB_GLOBAL(j_assert)(e, file, line);
 	else {
-		__db_errx(env, DB_STR_A("0059",
-		    "assert failure: %s/%d: \"%s\"",
-		    "%s %d %s"), file, line, e);
+		/*
+		 * If a panic has preceded this assertion failure, print that
+		 * message as well -- it might be relevant.
+		 */
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (PANIC_ISSET(env)) {
+			REGENV *renv;
+			renv = (env == NULL || env->reginfo == NULL) ?
+				NULL : env->reginfo->primary;
+			__db_errx(env, DB_STR_A("0242",
+			    "assert failure (%s/%d: %s) after panic %s",
+			    "%s %d %s %s"), file, line, e,
+			    renv == NULL ? "" : renv->failure_symptom);
+		} else
+#endif
+			__db_errx(env, DB_STR_A("0059",
+			    "assert failure: %s/%d: \"%s\"",
+			    "%s %d %s"), file, line, e);
 
 		__os_abort(env);
 		/* NOTREACHED */
@@ -156,8 +179,49 @@ __db_assert(env, e, file, line)
 #endif
 
 /*
+ * __env_panic_event -
+ *	Notify the application of a db_register, failchk, or generic panic.
+ *
+ * PUBLIC: void __env_panic_event __P((ENV *, int));
+ */
+void
+__env_panic_event(env, errval)
+	ENV *env;
+	int errval;
+{
+	DB_ENV *dbenv;
+	REGENV *renv;
+	u_int32_t event;
+	void *info;
+	DB_EVENT_FAILCHK_INFO failinfo;
+
+	dbenv = env->dbenv;
+	info = &errval;
+	if (dbenv->db_paniccall != NULL)	/* Deprecated */
+		dbenv->db_paniccall(dbenv, errval);
+	/*
+	 * We check for DB_EVENT_FAILCHK and DB_EVENT_REG_PANIC first because
+	 * they are not set by themselves. If one of those is set, it means that
+	 * this panic is somewhat an expected consequence of a previous failure.
+	 */
+	renv = (env->reginfo == NULL) ? NULL : env->reginfo->primary;
+	if (renv != NULL && renv->failure_panic) {
+		event = DB_EVENT_FAILCHK_PANIC;
+		failinfo.error = errval;
+		(void)strncpy(failinfo.symptom,
+		    renv->failure_symptom, sizeof(failinfo.symptom));
+		failinfo.symptom[sizeof(failinfo.symptom) - 1] = '\0';
+		info = &failinfo;
+	} else if (renv != NULL && renv->reg_panic)
+		event = DB_EVENT_REG_PANIC;
+	else
+		event = DB_EVENT_PANIC;
+	DB_EVENT(env, event, info);
+}
+
+/*
  * __env_panic_msg --
- *	Just report that someone else paniced.
+ *	Report that we noticed a panic which had been set somewhere else.
  *
  * PUBLIC: int __env_panic_msg __P((ENV *));
  */
@@ -165,28 +229,16 @@ int
 __env_panic_msg(env)
 	ENV *env;
 {
-	DB_ENV *dbenv;
 	int ret;
 
-	dbenv = env->dbenv;
-
 	ret = DB_RUNRECOVERY;
+	/* Make a note saying where this panic was detected. */
+	(void)USR_ERR(env, ret);
 
 	__db_errx(env, DB_STR("0060",
 	    "PANIC: fatal region error detected; run recovery"));
 
-	if (dbenv->db_paniccall != NULL)		/* Deprecated */
-		dbenv->db_paniccall(dbenv, ret);
-
-	/* Must check for DB_EVENT_REG_PANIC panic first because it is never
-	 * set by itself.  If set, it means panic came from DB_REGISTER code
-	 * only, otherwise it could be from many possible places in the code.
-	 */
-	if ((env->reginfo != NULL) &&
-	    (((REGENV *)env->reginfo->primary)->reg_panic))
-		DB_EVENT(env, DB_EVENT_REG_PANIC, &ret);
-	else
-		DB_EVENT(env, DB_EVENT_PANIC, &ret);
+	__env_panic_event(env, ret);
 
 	return (ret);
 }
@@ -202,28 +254,13 @@ __env_panic(env, errval)
 	ENV *env;
 	int errval;
 {
-	DB_ENV *dbenv;
-
-	dbenv = env->dbenv;
-
 	if (env != NULL) {
 		__env_panic_set(env, 1);
 
-		__db_err(env, errval, DB_STR("0061", "PANIC"));
+		if (errval != DB_RUNRECOVERY)
+			__db_err(env, errval, DB_STR("0061", "PANIC"));
 
-		if (dbenv->db_paniccall != NULL)	/* Deprecated */
-			dbenv->db_paniccall(dbenv, errval);
-
-		/* Must check for DB_EVENT_REG_PANIC first because it is never
-		 * set by itself.  If set, it means panic came from DB_REGISTER
-		 * code only, otherwise it could be from many possible places
-		 * in the code.
-		 */
-		if ((env->reginfo != NULL) &&
-		    (((REGENV *)env->reginfo->primary)->reg_panic))
-			DB_EVENT(env, DB_EVENT_REG_PANIC, &errval);
-		else
-			DB_EVENT(env, DB_EVENT_PANIC, &errval);
+		__env_panic_event(env, errval);
 	}
 
 #if defined(DIAGNOSTIC) && !defined(CONFIG_TEST)
@@ -302,6 +339,9 @@ db_strerror(error)
 	case DB_LOG_VERIFY_BAD:
 		return (DB_STR("0071",
 		    "DB_LOG_VERIFY_BAD: Log verification failed"));
+	case DB_META_CHKSUM_FAIL:
+		return (DB_STR("0247",
+	    "DB_META_CHKSUM_FAIL: Checksum mismatch detected on a database metadata page"));
 	case DB_NOSERVER:
 		return (DB_STR("0072",
     "DB_NOSERVER: No message dispatch call-back function has been configured"));
@@ -419,18 +459,21 @@ __db_syserr(env, error, fmt, va_alist)
 	DB_ENV *dbenv;
 
 	dbenv = env == NULL ? NULL : env->dbenv;
+	if (env != NULL)
+		(void)USR_ERR(env, error);
 
 	/*
 	 * The same as DB->err, except we don't default to writing to stderr
 	 * after any output channel has been configured, and we use a system-
 	 * specific function to translate errors to strings.
 	 */
-	DB_REAL_ERR(dbenv, error, DB_ERROR_SYSTEM, 0, fmt);
+	DB_REAL_ERR(dbenv,
+	    error, error == 0 ? DB_ERROR_NOT_SET : DB_ERROR_SYSTEM, 0, fmt);
 }
 
 /*
  * __db_err --
- *	Standard error routine.
+ *	Standard error routine with an error code.
  *
  * PUBLIC: void __db_err __P((const ENV *, int, const char *, ...))
  * PUBLIC:    __attribute__ ((__format__ (__printf__, 3, 4)));
@@ -450,6 +493,10 @@ __db_err(env, error, fmt, va_alist)
 
 	dbenv = env == NULL ? NULL : env->dbenv;
 
+	/* (If no deferred messages yet, at least?) add this calls' info.
+	(void)USR_ERR(env, error);
+	*/
+
 	/*
 	 * The same as DB->err, except we don't default to writing to stderr
 	 * once an output channel has been configured.
@@ -459,7 +506,7 @@ __db_err(env, error, fmt, va_alist)
 
 /*
  * __db_errx --
- *	Standard error routine.
+ *	Standard error routine without any error code.
  *
  * PUBLIC: void __db_errx __P((const ENV *, const char *, ...))
  * PUBLIC:    __attribute__ ((__format__ (__printf__, 2, 3)));
@@ -500,25 +547,54 @@ __db_errcall(dbenv, error, error_set, fmt, ap)
 	const char *fmt;
 	va_list ap;
 {
-	char *p;
-	char buf[2048];		/* !!!: END OF THE STACK DON'T TRUST SPRINTF. */
-	char sysbuf[1024];	/* !!!: END OF THE STACK DON'T TRUST SPRINTF. */
+	char *end, *p;
+	char buf[2048 + DB_ERROR_HISTORY_SIZE];
+	char sysbuf[1024];
+#ifdef HAVE_ERROR_HISTORY
+	DB_MSGBUF *deferred_mb;
+	ptrdiff_t len;
+#endif
 
 	p = buf;
+	/* Reserve 1 byte at the end for '\0'. */
+	end = buf + sizeof(buf) - 1;
 	if (fmt != NULL)
 		p += vsnprintf(buf, sizeof(buf), fmt, ap);
+
 	if (error_set != DB_ERROR_NOT_SET)
-		p += snprintf(p,
-		    sizeof(buf) - (size_t)(p - buf), ": %s",
+		p += snprintf(p, (size_t)(end - p), ": %s",
 		    error_set == DB_ERROR_SET ? db_strerror(error) :
 		    __os_strerror(error, sysbuf, sizeof(sysbuf)));
 
+#ifdef HAVE_ERROR_HISTORY
+	/*
+	 * Append any messages (e.g., diagnostics) stashed away in the deferred
+	 * msgbuf. Strncpy() can't be trusted to append '\0', do it "manually".
+	 */
+	if ((deferred_mb = __db_deferred_get()) != NULL &&
+	    (len = deferred_mb->cur - deferred_mb->buf) != 0) {
+		p += snprintf(p,
+		    (size_t)(end - p), "\nErrors during this API call:");
+		if (len > (end - p))
+			len = end - p;
+		if (len != 0) {
+			memmove(p, deferred_mb->buf, (size_t)len);
+			p[len] = '\0';
+		}
+	}
+#endif
+
 	dbenv->db_errcall(dbenv, dbenv->db_errpfx, buf);
 }
 
 /*
  * __db_errfile --
- *	Do the error message work for FILE *s.
+ *	Do the error message work for FILE *s. Combine the messages into a
+ *	single fprintf() call, to avoid interspersed output when there are
+ *	multiple active threads.
+ *
+ *	Display a ": " after the dbenv prefix, if it has one.
+ *	Display a ": " before the error message string, if it error was set.
  *
  * PUBLIC: void __db_errfile
  * PUBLIC:    __P((const DB_ENV *, int, db_error_set_t, const char *, va_list));
@@ -532,29 +608,62 @@ __db_errfile(dbenv, error, error_set, fmt, ap)
 	va_list ap;
 {
 	FILE *fp;
-	int need_sep;
-	char sysbuf[1024];	/* !!!: END OF THE STACK DON'T TRUST SPRINTF. */
+	char *defintro, *defmsgs, *error_str, *prefix, *sep1, *sep2;
+	char sysbuf[200];
+	char prefix_buf[200];
+	char full_fmt[4096];
+#ifdef HAVE_ERROR_HISTORY
+	DB_MSGBUF *deferred_mb;
+	size_t room;
+#endif
 
+	prefix = sep1 = sep2 = error_str = "";
 	fp = dbenv == NULL ||
 	    dbenv->db_errfile == NULL ? stderr : dbenv->db_errfile;
-	need_sep = 0;
+	if (fmt == NULL)
+		fmt = "";
 
 	if (dbenv != NULL && dbenv->db_errpfx != NULL) {
-		(void)fprintf(fp, "%s", dbenv->db_errpfx);
-		need_sep = 1;
+		prefix = __db_fmt_quote(prefix_buf,
+		    sizeof(prefix_buf), dbenv->db_errpfx);
+		sep1 = ": ";
 	}
-	if (fmt != NULL && fmt[0] != '\0') {
-		if (need_sep)
-			(void)fprintf(fp, ": ");
-		need_sep = 1;
-		(void)vfprintf(fp, fmt, ap);
+	switch (error_set) {
+	case DB_ERROR_NOT_SET:
+		break;
+	case DB_ERROR_SET:
+		error_str = db_strerror(error);
+		sep2 = ": ";
+		break;
+	case DB_ERROR_SYSTEM:
+		error_str = __os_strerror(error, sysbuf, sizeof(sysbuf));
+		sep2 = ": ";
+		break;
 	}
-	if (error_set != DB_ERROR_NOT_SET)
-		(void)fprintf(fp, "%s%s",
-		    need_sep ? ": " : "",
-		    error_set == DB_ERROR_SET ? db_strerror(error) :
-		    __os_strerror(error, sysbuf, sizeof(sysbuf)));
-	(void)fprintf(fp, "\n");
+#ifdef HAVE_ERROR_HISTORY
+	if ((deferred_mb = __db_deferred_get()) != NULL &&
+	    deferred_mb->cur != deferred_mb->buf) {
+		defmsgs =
+		    __db_fmt_quote(deferred_mb->buf, deferred_mb->len, NULL);
+		defintro = "\nErrors during this API call:";
+		/*
+		 * If there are more deferred messages than will be displayed
+		 * change the introductory message to warn of the truncation.
+		 */
+		room = sizeof(full_fmt) - (strlen(sep1) +
+		    strlen(fmt) + strlen(sep2) + strlen(error_str));
+		if (deferred_mb->len + strlen(defintro) > room) {
+			defintro =
+			    "\nFirst recorded errors during this API call:";
+			memmove(defmsgs + room - 4, "...\n", 4);
+		}
+
+	} else
+#endif
+		defmsgs = defintro = "";
+	(void)snprintf(full_fmt, sizeof(full_fmt), "%s%s%s%s%s%s%s\n", prefix,
+	    sep1, fmt, sep2, error_str, defintro, defmsgs);
+	(void)vfprintf(fp, full_fmt, ap);
 	(void)fflush(fp);
 }
 
@@ -562,15 +671,15 @@ __db_errfile(dbenv, error, error_set, fmt, ap)
  * __db_msgadd --
  *	Aggregate a set of strings into a buffer for the callback API.
  *
- * PUBLIC: void __db_msgadd __P((ENV *, DB_MSGBUF *, const char *, ...))
+ * PUBLIC: void __db_msgadd __P((const ENV *, DB_MSGBUF *, const char *, ...))
  * PUBLIC:    __attribute__ ((__format__ (__printf__, 3, 4)));
  */
 void
 #ifdef STDC_HEADERS
-__db_msgadd(ENV *env, DB_MSGBUF *mbp, const char *fmt, ...)
+__db_msgadd(const ENV *env, DB_MSGBUF *mbp, const char *fmt, ...)
 #else
 __db_msgadd(env, mbp, fmt, va_alist)
-	ENV *env;
+	const ENV *env;
 	DB_MSGBUF *mbp;
 	const char *fmt;
 	va_dcl
@@ -592,17 +701,17 @@ __db_msgadd(env, mbp, fmt, va_alist)
  *	Aggregate a set of strings into a buffer for the callback API.
  *
  * PUBLIC: void __db_msgadd_ap
- * PUBLIC:     __P((ENV *, DB_MSGBUF *, const char *, va_list));
+ * PUBLIC:     __P((const ENV *, DB_MSGBUF *, const char *, va_list));
  */
 void
 __db_msgadd_ap(env, mbp, fmt, ap)
-	ENV *env;
+	const ENV *env;
 	DB_MSGBUF *mbp;
 	const char *fmt;
 	va_list ap;
 {
-	size_t len, olen;
-	char buf[2048];		/* !!!: END OF THE STACK DON'T TRUST SPRINTF. */
+	size_t len, nlen, olen;
+	char buf[2048];
 
 	len = (size_t)vsnprintf(buf, sizeof(buf), fmt, ap);
 
@@ -613,9 +722,16 @@ __db_msgadd_ap(env, mbp, fmt, ap)
 	 */
 	olen = (size_t)(mbp->cur - mbp->buf);
 	if (olen + len >= mbp->len) {
-		if (__os_realloc(env, mbp->len + len + 256, &mbp->buf))
+		/* Don't write too much for preallocated DB_MSGBUFs. */
+		if (F_ISSET(mbp, DB_MSGBUF_PREALLOCATED)) {
+			memset(mbp->cur, '*', mbp->len - olen);
+			mbp->cur = mbp->buf + mbp->len;
 			return;
-		mbp->len += (len + 256);
+		}
+		nlen = mbp->len + len + (env == NULL ? 8192 : 256);
+		if (__os_realloc(env, nlen, &mbp->buf))
+			return;
+		mbp->len = nlen;
 		mbp->cur = mbp->buf + olen;
 	}
 
@@ -648,6 +764,42 @@ __db_msg(env, fmt, va_alist)
 }
 
 /*
+ * __db_debug_msg --
+ *	Save a message to be displayed only if this API call returns an error.
+ *	The message is discarded if this API call succeeds.
+ *
+ * PUBLIC: void __db_debug_msg __P((const ENV *, const char *, ...));
+ */
+void
+#ifdef STDC_HEADERS
+__db_debug_msg(const ENV *env, const char *fmt, ...)
+#else
+__db_debug_msg(env, fmt, va_alist)
+	const ENV *env;
+	const char *fmt;
+	va_dcl
+#endif
+{
+#ifdef HAVE_ERROR_HISTORY
+	DB_MSGBUF *mb;
+	va_list ap;
+
+	if (env == NULL || (mb = __db_deferred_get()) == NULL)
+		return;
+
+#ifdef STDC_HEADERS
+	va_start(ap, fmt);
+#else
+	va_start(ap);
+#endif
+	__db_msgadd_ap(env, mb, fmt, ap);
+	va_end(ap);
+#endif
+	COMPQUIET(env, NULL);
+	COMPQUIET(fmt, NULL);
+}
+
+/*
  * __db_repmsg --
  *	Replication system message routine.
  *
@@ -665,7 +817,7 @@ __db_repmsg(env, fmt, va_alist)
 #endif
 {
 	va_list ap;
-	char buf[2048];		/* !!!: END OF THE STACK DON'T TRUST SPRINTF. */
+	char buf[2048];
 
 #ifdef STDC_HEADERS
 	va_start(ap, fmt);
@@ -679,7 +831,7 @@ __db_repmsg(env, fmt, va_alist)
 
 /*
  * __db_msgcall --
- *	Do the message work for callback functions.
+ *	Do the message work for callback functions in DB_REAL_MSG().
  */
 static void
 __db_msgcall(dbenv, fmt, ap)
@@ -687,16 +839,15 @@ __db_msgcall(dbenv, fmt, ap)
 	const char *fmt;
 	va_list ap;
 {
-	char buf[2048];		/* !!!: END OF THE STACK DON'T TRUST SPRINTF. */
+	char buf[2048];
 
 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
-
 	dbenv->db_msgcall(dbenv, buf);
 }
 
 /*
  * __db_msgfile --
- *	Do the message work for FILE *s.
+ *	Do the message work for FILE *s in DB_REAL_MSG().
  */
 static void
 __db_msgfile(dbenv, fmt, ap)
@@ -805,6 +956,13 @@ __db_check_txn(dbp, txn, assoc_locker, read_op)
 	if (IS_RECOVERING(env) || F_ISSET(dbp, DB_AM_RECOVER))
 		return (0);
 
+	if (txn != NULL && dbp->blob_threshold &&
+	    F_ISSET(txn, (TXN_READ_UNCOMMITTED | TXN_SNAPSHOT))) {
+	    __db_errx(env, DB_STR("0237",
+"Blob enabled databases do not support DB_READ_UNCOMMITTED and TXN_SNAPSHOT"));
+		return (EINVAL);
+	}
+
 	/*
 	 * Check for common transaction errors:
 	 *	an operation on a handle whose open commit hasn't completed.
@@ -1095,9 +1253,9 @@ __db_space_err(dbp)
 
 /*
  * __db_failed --
- *	Common failed thread  message.
+ *	Common failed thread message, e.g., after it is seen to have crashed.
  *
- * PUBLIC: int __db_failed __P((const ENV *,
+  PUBLIC: int __db_failed __P((const ENV *,
  * PUBLIC:      const char *, pid_t, db_threadid_t));
  */
 int
@@ -1108,11 +1266,321 @@ __db_failed(env, msg, pid, tid)
 	db_threadid_t tid;
 {
 	DB_ENV *dbenv;
-	char buf[DB_THREADID_STRLEN];
+	int ret;
+	char tidstr[DB_THREADID_STRLEN], failmsg[DB_FAILURE_SYMPTOM_SIZE];
 
 	dbenv = env->dbenv;
+	(void)dbenv->thread_id_string(dbenv, pid, tid, tidstr);
+	ret = USR_ERR(env, DB_RUNRECOVERY);
+	snprintf(failmsg, sizeof(failmsg), DB_STR_A("0113",
+	    "Thread/process %s failed: %s", "%s %s"), tidstr, msg);
+	(void)__env_failure_remember(env, failmsg);
+	__db_errx(env, "%s", failmsg);
+	return (ret);
+}
 
-	__db_errx(env, DB_STR_A("0113", "Thread/process %s failed: %s",
-	    "%s %s"), dbenv->thread_id_string(dbenv, pid, tid, buf),  msg);
-	return (DB_RUNRECOVERY);
+/*
+ * __env_failure_remember --
+ *	If this failure of a process in the environment is about to set panic
+ *	for the first time, record that a crashed thread was thw culprit.
+ *	Do nothing if panic has already been set. There are no mutexes here;
+ *	in order to avoid hanging on any crashed threads.
+ *
+ * PUBLIC: int __env_failure_remember __P((const ENV *, const char *));
+ */
+int
+__env_failure_remember(env, reason)
+	const ENV *env;
+	const char *reason;
+{
+	REGENV *renv;
+
+	renv = env->reginfo->primary;
+	if (renv == NULL || renv->panic || renv->failure_panic)
+		return (0);
+	renv->failure_panic = 1;
+	if (renv->failure_symptom[0] == '\0') {
+		(void)strncpy(renv->failure_symptom,
+		    reason, sizeof(renv->failure_symptom));
+		renv->failure_symptom[sizeof(renv->failure_symptom) - 1] = '\0';
+	}
+	return (0);
+}
+
+#if defined(HAVE_ERROR_HISTORY)
+/*
+ * __db_deferred_free --
+ *	Pthread_exit() calls this to release DB_GLOBAL(msgs_key)'s
+ *	thread-local storage.
+ */
+static void
+__db_deferred_free(void *p)
+{
+	DB_MSGBUF *mb;
+
+	if ((mb = p) != NULL) {
+		(void)pthread_setspecific(DB_GLOBAL(msgs_key), NULL);
+		if (mb->buf != NULL)
+			__os_free(NULL, mb->buf);
+		free(mb);
+	}
+}
+
+/*
+ * __db_thread_once_func --
+ *	The pthread_once() functions to initialize thread local storage.
+ */
+static void
+__db_thread_once_func()
+{
+	(void)pthread_key_create(&DB_GLOBAL(msgs_key), __db_deferred_free);
+}
+
+/*
+ * __db_thread_init --
+ *	Initialization hook to be called at least once per process, before
+ *	deferring any messages.
+ *
+ * PUBLIC: #ifdef HAVE_ERROR_HISTORY
+ * PUBLIC: void __db_thread_init __P((void));
+ * PUBLIC: #endif
+ */
+void
+__db_thread_init()
+{
+	/*
+	 * Assign the thread-local storage identifier. Tell thread exit to clean
+	 * up withl __db_deferred_free().
+	 */
+	(void)pthread_once(&DB_GLOBAL(thread_once), __db_thread_once_func);
+}
+
+/*
+ * __db_diags --
+ *
+ *	Save the context which triggers the "first notice" of an error code;
+ *	i.e., its creation. It doesn't touch anything when err == 0.
+ *
+ * PUBLIC: #ifdef HAVE_ERROR_HISTORY
+ * PUBLIC: int __db_diags __P((const ENV *, int));
+ * PUBLIC: #endif
+ */
+ int
+__db_diags(env, err)
+	const ENV *env;
+	int err;
+{
+	DB_MSGBUF *mb;
+
+	if (err != 0 && (mb = __db_deferred_get()) != NULL)
+		(void)__db_remember_context(env, mb, err);
+	return (err);
+}
+
+/*
+ * __db_deferred_get --
+ *	Get this thread's deferred DB_MSGBUF, possibly allocating it.
+ *
+ * PUBLIC: #ifdef HAVE_ERROR_HISTORY
+ * PUBLIC: DB_MSGBUF *__db_deferred_get __P((void));
+ * PUBLIC: #endif
+ */
+DB_MSGBUF *
+__db_deferred_get()
+{
+	DB_MSGBUF *mb;
+
+	if ((mb = pthread_getspecific(DB_GLOBAL(msgs_key))) == NULL) {
+		if ((mb = calloc(1, sizeof(*mb))) != NULL)
+			if (pthread_setspecific(DB_GLOBAL(msgs_key), mb) != 0) {
+				/* Nothing else is safe do on an error. */
+				free(mb);
+				mb = NULL;
+			}
+	}
+	return (mb);
+}
+
+/*
+ * __db_deferred_discard --
+ *	Discard any saved-up deferred messages, at e.g. the end of the command.
+ *
+ * PUBLIC: #ifdef HAVE_ERROR_HISTORY
+ * PUBLIC: void __db_deferred_discard __P((void));
+ * PUBLIC: #endif
+ */
+void
+__db_deferred_discard()
+{
+	DB_MSGBUF *mb;
+
+	if ((mb = pthread_getspecific(DB_GLOBAL(msgs_key))) != NULL)
+		mb->cur = mb->buf;
+}
+
+/*
+ * __db_remember_context
+ *	Save the context which triggers the "first notice" of an error code;
+ *	i.e., its creation. Include the time, thread, recent portion of the
+ *	stack, and the error number. Add replication info too?
+ *
+ *	Return the error number passed in, or 0?
+ *
+ * PUBLIC: #ifdef HAVE_ERROR_HISTORY
+ * PUBLIC: int __db_remember_context __P((const ENV *, DB_MSGBUF *, int));
+ * PUBLIC: #endif
+ */
+ int
+ __db_remember_context(env, mb, err)
+	const ENV *env;
+	DB_MSGBUF *mb;
+	int err;
+{
+	DB_ENV *dbenv;
+	LOG *lp;
+	db_timespec now;
+	pid_t pid;
+	db_threadid_t tid;
+	char threadid[DB_THREADID_STRLEN], timestr[CTIME_BUFLEN];
+
+	/* Limit the amount of context messges which are remembered. */
+	if (mb->len >= DB_ERROR_HISTORY_SIZE)
+		return (0);
+
+	lp = NULL;
+	if (env == NULL) {
+		dbenv = NULL;
+		threadid[0] = '\0';
+	} else {
+		dbenv = env->dbenv;
+		dbenv->thread_id(dbenv, &pid, &tid);
+		(void)dbenv->thread_id_string(dbenv, pid, tid, threadid);
+		if (LOGGING_ON(env) && !IS_RECOVERING(env))
+			lp = env->lg_handle->reginfo.primary;
+	}
+
+	__os_gettime(env, &now, 0);
+	(void)__db_ctimespec(&now, timestr);
+	__db_msgadd(env, mb, "\n[%s][%s] %s",
+	    timestr, threadid, db_strerror(err));
+	if (lp != NULL)
+		__db_msgadd(env, mb, " lsn [%lu][%lu]",
+		    (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
+
+#if defined(HAVE_BACKTRACE) && defined(HAVE_BACKTRACE_SYMBOLS)
+	/*
+	 * Add many frames of stack trace to the record, skipping the first two
+	 * frames: __os_stack_msgadd() and __db_remember_context().
+	 */
+	__db_msgadd(env, mb, " from\n");
+	__os_stack_msgadd(env, mb, 15, 2, NULL);
+#endif
+
+	return (0);
+}
+#endif
+
+/*
+ * __db_ctimespec --
+ *	Format a timespec in microseconds, similar to a terse __os_ctime(),
+ *	storing the results into a CTIME_BUFLEN sized buffer.
+ *	The result format depends on the availability of localtime, etc
+ *		MM/DD HH:MM:SS.uuuuuu	if strftime is available, or
+ *		Jan DD HH:MM:SS.uuuuuu	if only __os_ctime() is available.
+ *	Both are small enough to use __os_ctime() sized buffer, e.g. 26.
+ *	The other fields (year, day-of-week, ...) are intentionally removed.
+ *
+ * PUBLIC: char * __db_ctimespec __P((const db_timespec *, char *));
+ */
+char *
+__db_ctimespec(timespec, buf)
+	const db_timespec *timespec;
+	char *buf;
+{
+	char *d, date[CTIME_BUFLEN];
+#ifdef HAVE_STRFTIME
+	struct tm *tm_p;
+#ifdef HAVE_LOCALTIME_R
+	struct tm tm;
+#endif
+#endif
+
+	/* Print the time readably if possible; else print seconds. */
+#ifdef HAVE_STRFTIME
+#ifdef HAVE_LOCALTIME_R
+	tm_p = localtime_r(&timespec->tv_sec, &tm);
+#else
+	tm_p = localtime(&timespec->tv_sec);
+#endif
+	if (tm_p != NULL) {
+		d = date;
+		(void)strftime(d, sizeof(date), DB_GLOBAL(time_format), tm_p);
+	}
+	else
+#endif
+	{
+		/* Trim off the leading day-of-week; then the trailing year. */
+		d = __os_ctime(&timespec->tv_sec, date) + 4;
+		d[sizeof("Jan 01 00:00:00")] = '\0';
+	}
+	(void)snprintf(buf, CTIME_BUFLEN,
+	    "%s.%06lu", d, (u_long)(timespec->tv_nsec / NS_PER_US));
+	buf[CTIME_BUFLEN - 1] = '\0';	/* In case of buggy snprintf. */
+	return (buf);
+}
+
+/*
+ * __db_fmt_quote --
+ *	Copy a printf format string, quoting (doubling) each '%' along the way.
+ *	Use this when inserting a user-defined string into a *printf format.
+ *	If the src parameter is NULL, then quote in-place, shifting the
+ *	rest of the string down by one character for each quote.
+ *
+ * PUBLIC: char *__db_fmt_quote __P((char *, size_t, const char *));
+ */
+char *
+__db_fmt_quote(dest, destsize, src)
+	char *dest;
+	size_t destsize;
+	const char *src;
+{
+	char *d, *end;
+	const char *s;
+	size_t len;
+
+	/* Stop early enough so that dest always has room for a '\0'. */
+	end = dest + destsize - 1;
+	if (src == NULL) {
+		d = dest;
+		while ((d = strchr(d, '%')) != NULL && d[1] != '\0') {
+			/*
+			 * Shift the rest of the string by one byte to make
+			 * space for another '%'. By starting at d and adding 1
+			 * to the length, we double the '%' while copying the
+			 * string and its terminating '\0'.
+			 */
+			len = strlen(d) + 1;
+			memmove(d + 1, d, len);
+			/*
+			 * We're done if the string now is larger than the
+			 * reserved size; else advance over both '%'s.
+			 */
+			if (d + len >= end) {
+				DB_ASSERT(NULL, d + len == end);
+				*end = '\0';
+				break;
+			}
+			d += 2;
+		}
+	} else {
+		for (s = src, d = dest; *s != '\0' && d < end; d++, s++)
+			if ((*d = *s) == '%') {
+				/* Discard a % at the end of the string. */
+				if (s[1] == '\0')
+					break;
+				*++d = '%';
+			}
+		*d = '\0';
+	}
+	return (dest);
 }
diff --git a/src/common/db_getlong.c b/src/common/db_getlong.c
index cac55a0e..2dca6891 100644
--- a/src/common/db_getlong.c
+++ b/src/common/db_getlong.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/db_idspace.c b/src/common/db_idspace.c
index a9cbb1bf..4ac18e42 100644
--- a/src/common/db_idspace.c
+++ b/src/common/db_idspace.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/db_log2.c b/src/common/db_log2.c
index 9c929f84..42eb7e3a 100644
--- a/src/common/db_log2.c
+++ b/src/common/db_log2.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1995, 1996
diff --git a/src/common/db_shash.c b/src/common/db_shash.c
index a056e4b1..df862c04 100644
--- a/src/common/db_shash.c
+++ b/src/common/db_shash.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/dbt.c b/src/common/dbt.c
index 90409f2c..4a9970d9 100644
--- a/src/common/dbt.c
+++ b/src/common/dbt.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/mkpath.c b/src/common/mkpath.c
index c684692c..163dbfba 100644
--- a/src/common/mkpath.c
+++ b/src/common/mkpath.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/openflags.c b/src/common/openflags.c
index cec1f081..91d6e51b 100644
--- a/src/common/openflags.c
+++ b/src/common/openflags.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/os_method.c b/src/common/os_method.c
index 1ee06d7a..34627d59 100644
--- a/src/common/os_method.c
+++ b/src/common/os_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/util_arg.c b/src/common/util_arg.c
index 73416cb7..f5db1831 100644
--- a/src/common/util_arg.c
+++ b/src/common/util_arg.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/util_cache.c b/src/common/util_cache.c
index 1206940b..f0bc398d 100644
--- a/src/common/util_cache.c
+++ b/src/common/util_cache.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/util_log.c b/src/common/util_log.c
index d158d3f0..ffe69394 100644
--- a/src/common/util_log.c
+++ b/src/common/util_log.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/util_sig.c b/src/common/util_sig.c
index 02a0fcb2..b159cc80 100644
--- a/src/common/util_sig.c
+++ b/src/common/util_sig.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/common/zerofill.c b/src/common/zerofill.c
index 37662ddc..09d0dafe 100644
--- a/src/common/zerofill.c
+++ b/src/common/zerofill.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/crypto/aes_method.c b/src/crypto/aes_method.c
index 47193539..fed98f2b 100644
--- a/src/crypto/aes_method.c
+++ b/src/crypto/aes_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * Some parts of this code originally written by Adam Stubblefield,
  * -- astubble@rice.edu.
diff --git a/src/crypto/crypto.c b/src/crypto/crypto.c
index b731496f..ba115dd3 100644
--- a/src/crypto/crypto.c
+++ b/src/crypto/crypto.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * Some parts of this code originally written by Adam Stubblefield
  * -- astubble@rice.edu
@@ -15,6 +15,8 @@
 #include "dbinc/db_page.h"
 #include "dbinc/crypto.h"
 
+static void randomize __P((ENV *, void *, size_t));
+
 /*
  * __crypto_region_init --
  *	Initialize crypto.
@@ -110,7 +112,7 @@ __crypto_region_init(env)
 	 * existing one, we are done with the passwd in the env.  We smash
 	 * N-1 bytes so that we don't overwrite the nul.
 	 */
-	memset(dbenv->passwd, 0xff, dbenv->passwd_len-1);
+	randomize(env, dbenv->passwd, dbenv->passwd_len - 1);
 	__os_free(env, dbenv->passwd);
 	dbenv->passwd = NULL;
 	dbenv->passwd_len = 0;
@@ -135,9 +137,10 @@ __crypto_env_close(env)
 	dbenv = env->dbenv;
 
 	if (dbenv->passwd != NULL) {
-		memset(dbenv->passwd, 0xff, dbenv->passwd_len-1);
+		randomize(env, dbenv->passwd, dbenv->passwd_len - 1);
 		__os_free(env, dbenv->passwd);
 		dbenv->passwd = NULL;
+		dbenv->passwd_len = 0;
 	}
 
 	if (!CRYPTO_ON(env))
@@ -225,7 +228,8 @@ __crypto_algsetup(env, db_cipher, alg, do_init)
 
 /*
  * __crypto_decrypt_meta --
- *	Perform decryption on a metapage if needed.
+ *	Perform decryption on a possible metadata page, if needed. This is used
+ *	to help decide whether this is a real DB. Don't trust random data.
  *
  * PUBLIC:  int __crypto_decrypt_meta __P((ENV *, DB *, u_int8_t *, int));
  */
@@ -241,6 +245,7 @@ __crypto_decrypt_meta(env, dbp, mbuf, do_metachk)
 	DB_CIPHER *db_cipher;
 	size_t pg_off;
 	int ret;
+	unsigned added_flags;
 	u_int8_t *iv;
 
 	/*
@@ -293,6 +298,7 @@ __crypto_decrypt_meta(env, dbp, mbuf, do_metachk)
 	 */
 	if (meta->encrypt_alg != 0) {
 		db_cipher = env->crypto_handle;
+		added_flags = 0;
 		if (!F_ISSET(dbp, DB_AM_ENCRYPT)) {
 			if (!CRYPTO_ON(env)) {
 				__db_errx(env, DB_STR("0178",
@@ -300,12 +306,14 @@ __crypto_decrypt_meta(env, dbp, mbuf, do_metachk)
 				return (EINVAL);
 			}
 			/*
-			 * User has a correct, secure env, but has encountered
-			 * a database in that env that is secure, but user
-			 * didn't dbp->set_flags.  Since it is existing, use
-			 * encryption if it is that way already.
+			 * User has a correct, secure env and has encountered
+			 * a database in that env that APPEARS TO BE secure, but
+			 * user didn't set the encryption flags. Since the db
+			 * already exists, turn encryption on. Remember what was
+			 * set, so the flags can restored if it doesn't decrypt.
 			 */
-			F_SET(dbp, DB_AM_ENCRYPT|DB_AM_CHKSUM);
+			added_flags = DB_AM_ENCRYPT | DB_AM_CHKSUM;
+			F_SET(dbp, added_flags);
 		}
 		/*
 		 * This was checked in set_flags when DB_AM_ENCRYPT was set.
@@ -316,6 +324,7 @@ __crypto_decrypt_meta(env, dbp, mbuf, do_metachk)
 		    meta->encrypt_alg != db_cipher->alg) {
 			__db_errx(env, DB_STR("0179",
 			    "Database encrypted using a different algorithm"));
+			F_CLR(dbp, added_flags);
 			return (EINVAL);
 		}
 		DB_ASSERT(env, F_ISSET(dbp, DB_AM_CHKSUM));
@@ -334,12 +343,14 @@ alg_retry:
 		if (!F_ISSET(db_cipher, CIPHER_ANY)) {
 			if (do_metachk && (ret = db_cipher->decrypt(env,
 			    db_cipher->data, iv, mbuf + pg_off,
-			    DBMETASIZE - pg_off)))
+			    DBMETASIZE - pg_off))) {
+				F_CLR(dbp, added_flags);
 				return (ret);
-			if (((BTMETA *)meta)->crypto_magic !=
-			    meta->magic) {
+			}
+			if (((BTMETA *)meta)->crypto_magic != meta->magic) {
 				__db_errx(env, DB_STR("0180",
 				    "Invalid password"));
+				F_CLR(dbp, added_flags);
 				return (EINVAL);
 			}
 			/*
@@ -409,3 +420,45 @@ __crypto_set_passwd(env_src, env_dest)
 	sh_passwd = R_ADDR(infop, cipher->passwd);
 	return (__env_set_encrypt(env_dest->dbenv, sh_passwd, DB_ENCRYPT_AES));
 }
+
+/*
+ * randomize
+ *	
+ */
+static void
+randomize(env, base, size)
+	ENV *env;
+	void *base;
+	size_t size;
+{
+	size_t i, copysize;
+	u_int8_t  last, *p;
+	u_int32_t value;
+
+	last = ((u_int8_t *)base)[size];
+	for (i = 0, p = base; i < size; i += copysize, p += copysize) {
+		value = __os_random();
+		if ((copysize = (size - i)) > sizeof(int32_t))
+			copysize = sizeof(int32_t);
+		switch (copysize)
+		{
+		default:
+			memmove(p, &value, sizeof(int32_t));
+		    	break;
+		case 3:
+			p[2] = (u_int8_t)(value >> 16);
+			/* FALLTHROUGH */
+		case 2:
+			p[1] = (u_int8_t)(value >> 8);
+			/* FALLTHROUGH */
+		case 1:
+			p[0] = (u_int8_t)(value);
+			break;
+		case 0:
+			DB_ASSERT(env, "randomize size 0?");
+			break;
+		}
+
+	}
+	DB_ASSERT(env, last == *p);
+}
diff --git a/src/crypto/mersenne/mt19937db.c b/src/crypto/mersenne/mt19937db.c
index 2d53c312..0460b994 100644
--- a/src/crypto/mersenne/mt19937db.c
+++ b/src/crypto/mersenne/mt19937db.c
@@ -156,7 +156,7 @@ __db_genrand(env)
 		 * function will return 4 bytes if we don't send in a key.
 		 */
 		do {
-			__os_gettime(env, &ts, 1);
+			__os_gettime(env, &ts, 0);
 			__db_chksum(NULL, (u_int8_t *)&ts.tv_sec,
 			    sizeof(ts.tv_sec), NULL, (u_int8_t *)&seed);
 		} while (seed == 0);
diff --git a/src/crypto/rijndael/rijndael-api-fst.c b/src/crypto/rijndael/rijndael-api-fst.c
index 3fd6489d..5d67937c 100644
--- a/src/crypto/rijndael/rijndael-api-fst.c
+++ b/src/crypto/rijndael/rijndael-api-fst.c
@@ -56,7 +56,7 @@ __db_makeKey(key, direction, keyLen, keyMaterial)
 {
 	u8 cipherKey[MAXKB];
 
-	if (key == NULL) {
+	if (key == NULL || keyMaterial == NULL) {
 		return BAD_KEY_INSTANCE;
 	}
 
@@ -72,9 +72,7 @@ __db_makeKey(key, direction, keyLen, keyMaterial)
 		return BAD_KEY_MAT;
 	}
 
-	if (keyMaterial != NULL) {
-		memcpy(cipherKey, keyMaterial, key->keyLen/8);
-	}
+	memcpy(cipherKey, keyMaterial, key->keyLen/8);
 
 	if (direction == DIR_ENCRYPT) {
 		key->Nr = __db_rijndaelKeySetupEnc(key->rk, cipherKey, keyLen);
diff --git a/src/db/crdel.src b/src/db/crdel.src
index 70473899..a1cbc0ed 100644
--- a/src/db/crdel.src
+++ b/src/db/crdel.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/db/crdel_rec.c b/src/db/crdel_rec.c
index 08e7bae8..2c529627 100644
--- a/src/db/crdel_rec.c
+++ b/src/db/crdel_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -81,7 +81,7 @@ __crdel_metasub_recover(env, dbtp, lsnp, op, info)
 		/*
 		 * If this was an in-memory database and we are re-creating
 		 * and this is the meta-data page, then we need to set up a
-		 * bunch of fields in the dbo as well.
+		 * bunch of fields in the dbp as well.
 		 */
 		if (F_ISSET(file_dbp, DB_AM_INMEM) &&
 		    argp->pgno == PGNO_BASE_MD &&
diff --git a/src/db/db.c b/src/db/db.c
index 0d9d1e6e..ffeb6d2b 100644
--- a/src/db/db.c
+++ b/src/db/db.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -41,6 +41,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc_auto/sequence_ext.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_swap.h"
 #include "dbinc/btree.h"
@@ -92,6 +93,9 @@ __db_master_open(subdbp, ip, txn, name, flags, mode, dbpp)
 	if ((ret = __db_create_internal(&dbp, subdbp->env, 0)) != 0)
 		return (ret);
 
+	/* Set the creation directory. */
+	dbp->dirname = subdbp->dirname;
+
 	/*
 	 * It's always a btree.
 	 * Run in the transaction we've created.
@@ -105,6 +109,20 @@ __db_master_open(subdbp, ip, txn, name, flags, mode, dbpp)
 	    DB_AM_ENCRYPT | DB_AM_CHKSUM | DB_AM_NOT_DURABLE));
 
 	/*
+	 * If creating the master database, disable blobs, but assign it a
+	 * blob file id if blobs are enabled in the subdatabase.  This means
+	 * that subdatabses can only support blobs if the first subdatabse
+	 * supports blobs.  This is a temporary restriction, but is needed at
+	 * the moment to prevent an infinite loop.
+	 */
+	dbp->blob_threshold = 0;
+	if (LF_ISSET(DB_CREATE) && subdbp->blob_threshold != 0) {
+		if ((ret = __blob_generate_dir_ids(
+		    dbp, txn, &dbp->blob_file_id)) != 0)
+			return (ret);
+	}
+
+	/*
 	 * If there was a subdb specified, then we only want to apply
 	 * DB_EXCL to the subdb, not the actual file.  We only got here
 	 * because there was a subdb specified.
@@ -819,6 +837,21 @@ __db_refresh(dbp, txn, flags, deferred_closep, reuse)
 	if (dbp->mpf == NULL)
 		LF_SET(DB_NOSYNC);
 
+#ifdef HAVE_64BIT_TYPES
+	/* Close the blob meta data databases. */
+	if (dbp->blob_seq != NULL) {
+		if ((t_ret = __seq_close(dbp->blob_seq, 0)) != 0 && ret == 0)
+			ret = t_ret;
+		dbp->blob_seq = NULL;
+	}
+	if (dbp->blob_meta_db != NULL) {
+		if ((t_ret = __db_close(
+		    dbp->blob_meta_db, NULL, 0)) != 0 && ret == 0)
+			ret = t_ret;
+		dbp->blob_meta_db = NULL;
+	}
+#endif
+
 	/* If never opened, or not currently open, it's easy. */
 	if (!F_ISSET(dbp, DB_AM_OPEN_CALLED))
 		goto never_opened;
@@ -1164,6 +1197,10 @@ never_opened:
 		__os_free(dbp->env, dbp->dname);
 		dbp->dname = NULL;
 	}
+	if (dbp->blob_sub_dir != NULL) {
+		__os_free(dbp->env, dbp->blob_sub_dir);
+		dbp->blob_sub_dir = NULL;
+	}
 
 	/* Discard any memory used to store returned data. */
 	if (dbp->my_rskey.data != NULL)
@@ -1235,8 +1272,11 @@ __db_disassociate(sdbp)
 	sdbp->s_refcnt = 0;
 
 	while ((dbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL)
-		if ((t_ret = __dbc_destroy(dbc)) != 0 && ret == 0)
-			ret = t_ret;
+		if ((t_ret = __dbc_destroy(dbc)) != 0) {
+			if (ret == 0)
+				ret = t_ret;
+			break;
+		}
 
 	F_CLR(sdbp, DB_AM_SECONDARY);
 	return (ret);
diff --git a/src/db/db.src b/src/db/db.src
index 879c7856..4a90ac16 100644
--- a/src/db/db.src
+++ b/src/db/db.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/db/db_am.c b/src/db/db_am.c
index 1cf3a505..84bb04bb 100644
--- a/src/db/db_am.c
+++ b/src/db/db_am.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -205,6 +205,7 @@ __db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp)
 	/* Refresh the DBC structure. */
 	dbc->dbtype = dbtype;
 	RESET_RET_MEM(dbc);
+	dbc->db_stream = __dbc_db_stream;
 	dbc->set_priority = __dbc_set_priority;
 	dbc->get_priority = __dbc_get_priority;
 	dbc->priority = dbp->priority;
@@ -314,11 +315,11 @@ __db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp)
 	if (F2_ISSET(dbp, DB2_AM_EXCL)) {
 		F_SET(dbc, DBC_DONTLOCK);
 		if (IS_REAL_TXN(txn)&& !LF_ISSET(DBC_OPD | DBC_DUPLICATE)) {
-			/* 
-			 * Exclusive databases can only have one active 
-			 * transaction at a time since there are no internal 
+			/*
+			 * Exclusive databases can only have one active
+			 * transaction at a time since there are no internal
 			 * locks to prevent one transaction from reading and
-			 * writing another's uncommitted changes. 
+			 * writing another's uncommitted changes.
 			 */
 			if (dbp->cur_txn != NULL && dbp->cur_txn != txn) {
 			    __db_errx(env, DB_STR("0749",
@@ -332,7 +333,7 @@ __db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp)
 				memset(&req, 0, sizeof(req));
 				req.lock = dbp->handle_lock;
 				req.op = DB_LOCK_TRADE;
-				if ((ret = __lock_vec(env, txn->locker, 0, 
+				if ((ret = __lock_vec(env, txn->locker, 0,
 				    &req, 1, 0)) != 0)
 					goto err;
 				dbp->cur_txn = txn;
@@ -397,10 +398,11 @@ __db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp)
 	if (ip != NULL) {
 		dbc->thread_info = ip;
 #ifdef DIAGNOSTIC
-		if (dbc->locker != NULL)
+		if (dbc->locker != NULL) {
+			dbc->locker->prev_locker = ip->dbth_locker;
 			ip->dbth_locker =
 			    R_OFFSET(&(env->lk_handle->reginfo), dbc->locker);
-		else
+		} else
 			ip->dbth_locker = INVALID_ROFF;
 #endif
 	} else if (txn != NULL)
diff --git a/src/db/db_backup.c b/src/db/db_backup.c
index 66d7382a..1c72e4d7 100644
--- a/src/db/db_backup.c
+++ b/src/db/db_backup.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2011, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2011, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -24,8 +24,9 @@ static int backup_read_data_dir
     __P((DB_ENV *, DB_THREAD_INFO *, const char *, const char *, u_int32_t));
 static int backup_dir_clean
     __P((DB_ENV *, const char *, const char *, int *, u_int32_t));
-static int backup_data_copy
-    __P((DB_ENV *, const char *, const char *, const char *, int));
+static int backup_lgconf_chk __P((DB_ENV *));
+static int __db_backup
+    __P((DB_ENV *, const char *, DB_THREAD_INFO *, int, u_int32_t));
 
 /*
  * __db_dbbackup_pp --
@@ -47,9 +48,9 @@ __db_dbbackup_pp(dbenv, dbfile, target, flags)
 	    "DB_ENV->dbbackup", flags, DB_EXCL)) != 0)
 		return (ret);
 	ENV_ENTER(dbenv->env, ip);
-
-	ret = __db_dbbackup(dbenv, ip, dbfile, target, flags);
-
+	REPLICATION_WRAP(dbenv->env,
+	    (__db_dbbackup(
+	    dbenv, ip, dbfile, target, flags, 0, NULL)), 0, ret);
 	ENV_LEAVE(dbenv->env, ip);
 	return (ret);
 }
@@ -58,15 +59,17 @@ __db_dbbackup_pp(dbenv, dbfile, target, flags)
  * __db_dbbackup --
  *	Copy a database file coordinated with mpool.
  *
- * PUBLIC: int __db_dbbackup __P((DB_ENV *, DB_THREAD_INFO *,
- * PUBLIC:     const char *, const char *, u_int32_t));
+ * PUBLIC: int __db_dbbackup __P((DB_ENV *, DB_THREAD_INFO *, const char *,
+ * PUBLIC:     const char *, u_int32_t, u_int32_t, const char *));
  */
 int
-__db_dbbackup(dbenv, ip, dbfile, target, flags)
+__db_dbbackup(dbenv, ip, dbfile, target, flags, oflags, full_path)
 	DB_ENV *dbenv;
 	DB_THREAD_INFO *ip;
 	const char *dbfile, *target;
 	u_int32_t flags;
+	u_int32_t oflags;
+	const char *full_path;
 {
 	DB *dbp;
 	DB_FH *fp;
@@ -77,8 +80,8 @@ __db_dbbackup(dbenv, ip, dbfile, target, flags)
 	retry_count = 0;
 
 retry:	if ((ret = __db_create_internal(&dbp, dbenv->env, 0)) == 0 &&
-	    (ret = __db_open(dbp, ip, NULL, dbfile, NULL,
-	    DB_UNKNOWN, DB_AUTO_COMMIT | DB_RDONLY, 0, PGNO_BASE_MD)) != 0) {
+	    (ret = __db_open(dbp, ip, NULL, dbfile, NULL, DB_UNKNOWN,
+	    DB_AUTO_COMMIT | DB_RDONLY | oflags, 0, PGNO_BASE_MD)) != 0) {
 		if (ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED) {
 			(void)__db_close(dbp, NULL, DB_NOSYNC);
 			dbp = NULL;
@@ -91,9 +94,16 @@ retry:	if ((ret = __db_create_internal(&dbp, dbenv->env, 0)) == 0 &&
 		}
 	}
 
+	/* Hot backup requires DB_LOG_BLOB. */
+	if (ret == 0 && dbp->blob_threshold != 0 &&
+	    (ret = backup_lgconf_chk(dbenv)) != 0)
+		goto err;
+
+	if (full_path == NULL)
+		full_path = dbfile;
 	if (ret == 0) {
 		if ((ret = __memp_backup_open(dbenv->env,
-		    dbp->mpf, dbfile, target, flags, &fp, &handle)) == 0) {
+		    dbp->mpf, full_path, target, flags, &fp, &handle)) == 0) {
 			if (dbp->type == DB_HEAP)
 				ret = __heap_backup(
 				    dbenv, dbp, ip, fp, handle, flags);
@@ -104,10 +114,21 @@ retry:	if ((ret = __db_create_internal(&dbp, dbenv->env, 0)) == 0 &&
 				    fp, handle, flags);
 		}
 		if ((t_ret = __memp_backup_close(dbenv->env,
-		    dbp->mpf, dbfile, fp, handle)) != 0 && ret == 0)
+		    dbp->mpf, full_path, fp, handle)) != 0 && ret == 0)
 			ret = t_ret;
 	}
 
+	/*
+	 * Copy blob files.  Since no locking is done here, it is possible
+	 * that a blob file may be copied in the middle of being written.
+	 * This is not a problem since hotbackup requires DB_LOG_BLOB and
+	 * catastrophic recovery, which will fix any inconsistances in the
+	 * blob files.
+	 */
+	if (ret == 0 && dbp->blob_threshold != 0 &&
+	    (t_ret = __blob_copy_all(dbp, target, flags)) != 0)
+		ret= t_ret;
+
 #ifdef HAVE_QUEUE
 	/*
 	 * For compatibility with the 5.2 and patch versions of db_copy
@@ -117,7 +138,7 @@ retry:	if ((ret = __db_create_internal(&dbp, dbenv->env, 0)) == 0 &&
 		ret = __qam_backup_extents(dbp, ip, target, flags);
 #endif
 
-	if (dbp != NULL &&
+err:	if (dbp != NULL &&
 	    (t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0)
 		ret = t_ret;
 
@@ -205,8 +226,11 @@ backup_dir_clean(dbenv, backup_dir, log_dir, remove_maxp, flags)
 /*
  * backup_data_copy --
  *	Copy a non-database file into the backup directory.
+ *
+ * PUBLIC: int backup_data_copy __P((
+ * PUBLIC:	DB_ENV *, const char *, const char *, const char *, int));
  */
-static int
+int
 backup_data_copy(dbenv, file, from_dir, to_dir, log)
 	DB_ENV *dbenv;
 	const char *file, *from_dir, *to_dir;
@@ -352,13 +376,16 @@ backup_read_data_dir(dbenv, ip, dir, backup_dir, flags)
 	ENV *env;
 	FILE *savefile;
 	int fcnt, ret;
-	size_t cnt;
+	size_t cnt, len;
 	const char *bd;
 	char **names, buf[DB_MAXPATHLEN], bbuf[DB_MAXPATHLEN];
+	char fullpath[DB_MAXPATHLEN];
 	void (*savecall) (const DB_ENV *, const char *, const char *);
 
 	env = dbenv->env;
 	memset(bbuf, 0, sizeof(bbuf));
+	memset(fullpath, 0, sizeof(fullpath));
+	len = 0;
 
 	bd = backup_dir;
 	if (!LF_ISSET(DB_BACKUP_SINGLE_DIR) && dir != env->db_home) {
@@ -401,6 +428,12 @@ backup_read_data_dir(dbenv, ip, dir, backup_dir, flags)
 			    "%s: path too long", "%s"), buf);
 			return (EINVAL);
 		}
+		/* Save the original dir. */
+		if (!LF_ISSET(DB_BACKUP_SINGLE_DIR)) {
+			(void)snprintf(fullpath, sizeof(fullpath),
+			    "%s%c%c", dir, PATH_SEPARATOR[0], '\0');
+			len = strlen(fullpath);
+		}
 		dir = buf;
 	}
 	/* Get a list of file names. */
@@ -449,7 +482,16 @@ backup_read_data_dir(dbenv, ip, dir, backup_dir, flags)
 		savefile = dbenv->db_errfile;
 		dbenv->db_errfile = NULL;
 
-		ret = __db_dbbackup(dbenv, ip, names[cnt], bd, flags);
+		/*
+		 * If it is not backing up to a single directory, prefix
+		 * the file with 'dir' so that the file and directory structure
+		 * in the source and backup location will be the same.
+		 */
+		if (len != 0)
+			(void)snprintf(fullpath + len,
+			    sizeof(fullpath) - len, "%s%c", names[cnt], '\0');
+		ret = __db_dbbackup(dbenv, ip, names[cnt],
+		    backup_dir, flags, 0, len != 0 ? fullpath : NULL);
 
 		dbenv->db_errcall = savecall;
 		dbenv->db_errfile = savefile;
@@ -662,21 +704,22 @@ err:	if (logd != dbenv->db_log_dir && logd != env->db_home)
  * __db_backup --
  *	Backup databases in the enviornment.
  *
- * PUBLIC: int __db_backup __P((DB_ENV *, const char *, u_int32_t));
+ * PUBLIC: int __db_backup_pp __P((DB_ENV *, const char *, u_int32_t));
  */
 int
-__db_backup(dbenv, target, flags)
+__db_backup_pp(dbenv, target, flags)
 	DB_ENV *dbenv;
 	const char *target;
 	u_int32_t flags;
 {
 	DB_THREAD_INFO *ip;
 	ENV *env;
-	int copy_min, remove_max, ret;
-	char **dir;
+	u_int32_t bytes;
+	int remove_max, ret;
 
 	env = dbenv->env;
-	remove_max = copy_min = 0;
+	bytes = 0;
+	remove_max = 0;
 
 #undef	OKFLAGS
 #define	OKFLAGS								\
@@ -692,6 +735,11 @@ __db_backup(dbenv, target, flags)
 		return (EINVAL);
 	}
 
+	/* Hot backup requires DB_LOG_BLOB. */
+	if ((ret = __env_get_blob_threshold_int(env, &bytes)) != 0 ||
+	    (bytes != 0 && (ret = backup_lgconf_chk(dbenv)) != 0))
+		return (ret);
+
 	/*
 	 * If the target directory for the backup does not exist, create it
 	 * with mode read-write-execute for the owner.  Ignore errors here,
@@ -714,6 +762,30 @@ __db_backup(dbenv, target, flags)
 	}
 
 	ENV_ENTER(env, ip);
+	REPLICATION_WRAP(env,
+	    (__db_backup(dbenv, target, ip, remove_max, flags)), 0, ret);
+	ENV_LEAVE(env, ip);
+	return (ret);
+}
+
+/*
+ * __db_backup --
+ *	Backup databases in the enviornment.
+ */
+static int
+__db_backup(dbenv, target, ip, remove_max, flags)
+	DB_ENV *dbenv;
+	const char *target;
+	DB_THREAD_INFO *ip;
+	int remove_max;
+	u_int32_t flags;
+{
+	ENV *env;
+	int copy_min, ret;
+	char **dir;
+
+	env = dbenv->env;
+	copy_min = 0;
 
 	/*
 	 * If the UPDATE option was not specified, copy all database
@@ -724,6 +796,19 @@ __db_backup(dbenv, target, flags)
 		goto end;
 	F_SET(dbenv, DB_ENV_HOTBACKUP);
 	if (!LF_ISSET(DB_BACKUP_UPDATE)) {
+		/*
+		 * Don't allow absolute path of blob directory when
+		 * it is not backing up to a single directory.
+		 */
+		if (!LF_ISSET(DB_BACKUP_SINGLE_DIR) &&
+		    dbenv->db_blob_dir != NULL &&
+		    __os_abspath(dbenv->db_blob_dir)) {
+			__db_errx(env, DB_STR_A("0780",
+"blob directory '%s' is absolute path, not permitted unless backup is to a single directory",
+			"%s"), dbenv->db_blob_dir);
+			ret = EINVAL;
+			goto err;
+		}
 		if ((ret = backup_read_data_dir(dbenv,
 		    ip, env->db_home, target, flags)) != 0)
 			goto err;
@@ -734,8 +819,8 @@ __db_backup(dbenv, target, flags)
 			 * enviroment  -- running recovery with them would
 			 * corrupt the source files.
 			 */
-			if (!LF_ISSET(DB_BACKUP_SINGLE_DIR)
-			   && __os_abspath(*dir)) {
+			if (!LF_ISSET(DB_BACKUP_SINGLE_DIR) &&
+			    __os_abspath(*dir)) {
 				__db_errx(env, DB_STR_A("0725",
 "data directory '%s' is absolute path, not permitted unless backup is to a single directory",
 				    "%s"), *dir);
@@ -751,7 +836,17 @@ __db_backup(dbenv, target, flags)
 	/*
 	 * Copy all log files found in the log directory.
 	 * The log directory defaults to the home directory.
+	 * Don't allow absolute path of log directory when
+	 * it is not backing up to a single directory.
 	 */
+	if (!LF_ISSET(DB_BACKUP_SINGLE_DIR) &&
+	    dbenv->db_log_dir != NULL && __os_abspath(dbenv->db_log_dir)) {
+		__db_errx(env, DB_STR_A("0781",
+"log directory '%s' is absolute path, not permitted unless backup is to a single directory",
+		    "%s"), dbenv->db_log_dir);
+		ret = EINVAL;
+		goto err;
+	}
 	if ((ret = backup_read_log_dir(dbenv, target, &copy_min, flags)) != 0)
 		goto err;
 	/*
@@ -761,7 +856,7 @@ __db_backup(dbenv, target, flags)
 	 * cleanup.
 	 */
 	if (LF_ISSET(DB_BACKUP_UPDATE) && remove_max < copy_min &&
-	     !(remove_max == 0 && copy_min == 1)) {
+	    remove_max != 0 && copy_min != 1) {
 		__db_errx(env, DB_STR_A("0743",
 "the largest log file removed (%d) must be greater than or equal the smallest log file copied (%d)",
 		    "%d %d"), remove_max, copy_min);
@@ -770,6 +865,28 @@ __db_backup(dbenv, target, flags)
 
 err:	F_CLR(dbenv, DB_ENV_HOTBACKUP);
 	(void)__env_set_backup(env, 0);
-end:	ENV_LEAVE(env, ip);
+end:	return (ret);
+}
+
+/*
+ * __db_backup_fchk --
+ *	Log configure checking for backup when blob is enabled.
+ */
+static int
+backup_lgconf_chk(dbenv)
+	DB_ENV *dbenv;
+{
+	int lgconf, ret;
+
+	ret = 0;
+
+	if (LOGGING_ON(dbenv->env) && ((ret = __log_get_config(dbenv,
+	    DB_LOG_BLOB, &lgconf)) != 0 || lgconf == 0)) {
+		__db_errx(dbenv->env, DB_STR("0782",
+		    "Hot backup requires DB_LOG_BLOB"));
+		if (ret == 0)
+			ret = EINVAL;
+	}
+
 	return (ret);
 }
diff --git a/src/db/db_cam.c b/src/db/db_cam.c
index 6ee8b579..1a330bdb 100644
--- a/src/db/db_cam.c
+++ b/src/db/db_cam.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -11,6 +11,7 @@
 #include "db_int.h"
 #include "dbinc/db_page.h"
 #include "dbinc/btree.h"
+#include "dbinc/fop.h"
 #include "dbinc/hash.h"
 #include "dbinc/heap.h"
 #include "dbinc/lock.h"
@@ -83,6 +84,9 @@ __dbc_close(dbc)
 	DB *dbp;
 	DBC *opd;
 	DBC_INTERNAL *cp;
+#ifdef DIAGNOSTIC
+	DB_THREAD_INFO *ip;
+#endif
 	DB_TXN *txn;
 	ENV *env;
 	int ret, t_ret;
@@ -149,6 +153,14 @@ __dbc_close(dbc)
 			ret = t_ret;
 		F_CLR(dbc, DBC_FAMILY);
 	}
+#ifdef DIAGNOSTIC
+	if (dbc->locker != NULL) {
+		ENV_GET_THREAD_INFO(env, ip);
+		if (ip != NULL)
+			ip->dbth_locker = dbc->locker->prev_locker;
+		dbc->locker->prev_locker = INVALID_ROFF;
+	}
+#endif
 
 	if ((txn = dbc->txn) != NULL)
 		txn->cursors--;
@@ -510,6 +522,305 @@ __dbc_idel(dbc, flags)
 	return (ret);
 }
 
+/*
+ * __dbc_db_stream --
+ *
+ * DBC->db_stream
+ *
+ * PUBLIC: int __dbc_db_stream __P((DBC *, DB_STREAM **, u_int32_t));
+ */
+int
+__dbc_db_stream(dbc, dbsp, flags)
+	DBC *dbc;
+	DB_STREAM **dbsp;
+	u_int32_t flags;
+{
+	ENV *env;
+	int ret;
+	u_int32_t oflags;
+
+	env = dbc->env;
+	oflags = flags;
+
+	if ((ret = __db_fchk(
+	    env, "DBC->db_stream", flags,
+	    DB_STREAM_READ | DB_STREAM_WRITE | DB_STREAM_SYNC_WRITE)) != 0)
+		return (ret);
+
+	if (DB_IS_READONLY(dbc->dbp)) {
+		LF_SET(DB_STREAM_READ);
+		oflags |= DB_STREAM_READ;
+	}
+	if (LF_ISSET(DB_STREAM_READ) && LF_ISSET(DB_STREAM_WRITE)) {
+		ret = EINVAL;
+		__db_errx(env, DB_STR("0750",
+	    "Error, cannot set both DB_STREAM_WRITE and DB_STREAM_READ."));
+		goto err;
+	}
+
+	if (oflags & DB_STREAM_READ)
+		LF_SET(DB_FOP_READONLY);
+	else
+		LF_SET(DB_FOP_WRITE);
+	if (oflags & DB_STREAM_SYNC_WRITE)
+		LF_SET(DB_FOP_SYNC_WRITE);
+
+	ret = __db_stream_init(dbc, dbsp, flags);
+
+err:	return (ret);
+}
+
+/*
+ * __dbc_get_blob_id --
+ *
+ * Returns the blob id stored in the data record to which the cursor currently
+ * points.  Returns EINVAL if the cursor does not point to a blob record.
+ *
+ * PUBLIC: int __dbc_get_blob_id __P((DBC *, db_seq_t *));
+ */
+int
+__dbc_get_blob_id(dbc, blob_id)
+	DBC *dbc;
+	db_seq_t *blob_id;
+{
+	DBT key, data;
+	BBLOB bl;
+	HBLOB hbl;
+	HEAPBLOBHDR bhdr;
+	int ret;
+
+	if (dbc->dbtype != DB_BTREE &&
+	    dbc->dbtype != DB_HEAP && dbc->dbtype != DB_HASH) {
+		return (EINVAL);
+	}
+
+	ret = 0;
+	memset(&key, 0, sizeof(DBT));
+	memset(&data, 0, sizeof(DBT));
+	/* Get the blob database record instead of the blob. */
+	data.flags |= DB_DBT_BLOB_REC;
+
+	/*
+	 * It would be great if there was a more efficient way to do this, but
+	 * the complexities of getting a page from a database, especially
+	 * when taking into account things like partitions and compression,
+	 * make that more trouble than it is worth.
+	 */
+	if ((ret = __dbc_get(dbc, &key, &data, DB_CURRENT)) != 0)
+		goto err;
+
+	switch (dbc->dbtype) {
+	case DB_BTREE:
+		if (data.size != BBLOB_SIZE) {
+			ret = EINVAL;
+			goto err;
+		}
+		memcpy(&bl, data.data, BBLOB_SIZE);
+		if (B_TYPE(bl.type) != B_BLOB) {
+			ret = EINVAL;
+			goto err;
+		}
+		*blob_id = (db_seq_t)bl.id;
+		break;
+	case DB_HEAP:
+		if (data.size != HEAPBLOBREC_SIZE) {
+			ret = EINVAL;
+			goto err;
+		}
+		memcpy(&bhdr, data.data, HEAPBLOBREC_SIZE);
+		if (!F_ISSET(&bhdr.std_hdr, HEAP_RECBLOB)) {
+			ret = EINVAL;
+			goto err;
+		}
+		*blob_id = (db_seq_t)bhdr.id;
+		break;
+	case DB_HASH:
+		if (data.size != HBLOB_SIZE) {
+			ret = EINVAL;
+			goto err;
+		}
+		memcpy(&hbl, data.data, HBLOB_SIZE);
+		if (HPAGE_PTYPE(&hbl) != H_BLOB) {
+			ret = EINVAL;
+			goto err;
+		}
+		*blob_id = (db_seq_t)hbl.id;
+		break;
+	default:
+		ret = EINVAL;
+		goto err;
+	}
+
+err:	return (ret);
+}
+
+/*
+ * __dbc_get_blob_size --
+ *
+ * Returns the blob file size stored in the data record to which the cursor
+ * currently points.  Returns EINVAL if the cursor does not point to a blob
+ * record.
+ *
+ * PUBLIC: int __dbc_get_blob_size __P((DBC *, off_t *));
+ */
+int
+__dbc_get_blob_size(dbc, size)
+	DBC *dbc;
+	off_t *size;
+{
+	DBT key, data;
+	ENV *env;
+	BBLOB bl;
+	HBLOB hbl;
+	HEAPBLOBHDR bhdr;
+	int ret;
+
+	if (dbc->dbtype != DB_BTREE &&
+	    dbc->dbtype != DB_HEAP && dbc->dbtype != DB_HASH) {
+		return (EINVAL);
+	}
+
+	env = dbc->env;
+	ret = 0;
+	memset(&key, 0, sizeof(DBT));
+	memset(&data, 0, sizeof(DBT));
+	/* Get the blob database record instead of the blob. */
+	data.flags |= DB_DBT_BLOB_REC;
+
+	/*
+	 * It would be great if there was a more efficient way to do this, but
+	 * the complexities of getting a page from a database, especially
+	 * when taking into account things like partitions and compression,
+	 * make that more trouble than it is worth.
+	 */
+	if ((ret = __dbc_get(dbc, &key, &data, DB_CURRENT)) != 0)
+		goto err;
+
+	switch (dbc->dbtype) {
+	case DB_BTREE:
+		if (data.size != BBLOB_SIZE) {
+			ret = EINVAL;
+			goto err;
+		}
+		memcpy(&bl, data.data, BBLOB_SIZE);
+		if (B_TYPE(bl.type) != B_BLOB) {
+			ret = EINVAL;
+			goto err;
+		}
+		GET_BLOB_SIZE(env, bl, *size, ret);
+		break;
+	case DB_HEAP:
+		if (data.size != HEAPBLOBREC_SIZE) {
+			ret = EINVAL;
+			goto err;
+		}
+		memcpy(&bhdr, data.data, HEAPBLOBREC_SIZE);
+		if (!F_ISSET(&bhdr.std_hdr, HEAP_RECBLOB)) {
+			ret = EINVAL;
+			goto err;
+		}
+		GET_BLOB_SIZE(env, bhdr, *size, ret);
+		break;
+	case DB_HASH:
+		if (data.size != HBLOB_SIZE) {
+			ret = EINVAL;
+			goto err;
+		}
+		memcpy(&hbl, data.data, HBLOB_SIZE);
+		if (HPAGE_PTYPE(&hbl) != H_BLOB) {
+			ret = EINVAL;
+			goto err;
+		}
+		GET_BLOB_SIZE(env, hbl, *size, ret);
+		break;
+	default:
+		ret = EINVAL;
+		goto err;
+	}
+
+err:	return (ret);
+}
+
+/*
+ * __dbc_set_blob_size --
+ *
+ * Sets the blob file size in the data record to which the cursor
+ * currently points.  Returns EINVAL if the cursor does not point to a blob
+ * record.
+ *
+ * PUBLIC: int __dbc_set_blob_size __P((DBC *, off_t));
+ */
+int
+__dbc_set_blob_size(dbc, size)
+	DBC *dbc;
+	off_t size;
+{
+	DBT key, data;
+	BBLOB *bl;
+	HBLOB *hbl;
+	HEAPBLOBHDR *bhdr;
+	int ret;
+
+	if (dbc->dbtype != DB_BTREE &&
+	    dbc->dbtype != DB_HEAP && dbc->dbtype != DB_HASH) {
+		return (EINVAL);
+	}
+
+	ret = 0;
+	memset(&key, 0, sizeof(DBT));
+	memset(&data, 0, sizeof(DBT));
+	/* Get the blob database record instead of the blob. */
+	data.flags |= DB_DBT_BLOB_REC;
+
+	/*
+	 * It would be great if there was a more efficient way to do this, but
+	 * the complexities of getting a page from a database, especially
+	 * when taking into account things like partitions and compression,
+	 * make that more trouble than it is worth.
+	 */
+	if ((ret = __dbc_get(dbc, &key, &data, DB_CURRENT)) != 0)
+		goto err;
+
+	switch (dbc->dbtype) {
+	case DB_BTREE:
+		bl = (BBLOB *)data.data;
+		if (bl == NULL ||
+		    B_TYPE(bl->type) != B_BLOB || data.size != BBLOB_SIZE) {
+			ret = EINVAL;
+			goto err;
+		}
+		SET_BLOB_SIZE(bl, size, BBLOB);
+		break;
+	case DB_HEAP:
+		bhdr = (HEAPBLOBHDR *)data.data;
+		if (bhdr == NULL ||
+		    !F_ISSET(&bhdr->std_hdr, HEAP_RECBLOB) ||
+		    data.size != HEAPBLOBREC_SIZE) {
+			ret = EINVAL;
+			goto err;
+		}
+		SET_BLOB_SIZE(bhdr, size, HEAPBLOBHDR);
+		break;
+	case DB_HASH:
+		hbl = data.data;
+		if (hbl == NULL ||
+		    HPAGE_PTYPE(hbl) != H_BLOB || data.size != HBLOB_SIZE) {
+			ret = EINVAL;
+			goto err;
+		}
+		SET_BLOB_SIZE((HBLOB *)hbl, size, HBLOB);
+		break;
+	default:
+		ret = EINVAL;
+		goto err;
+	}
+
+	if ((ret = __dbc_put(dbc, &key, &data, DB_CURRENT)) != 0)
+		goto err;
+
+err:	return (ret);
+}
+
 #ifdef HAVE_COMPRESSION
 /*
  * __dbc_bulk_del --
@@ -632,6 +943,12 @@ __dbc_idup(dbc_orig, dbcp, flags)
 		int_n->stream_off = int_orig->stream_off;
 		int_n->stream_curr_pgno = int_orig->stream_curr_pgno;
 
+#ifdef HAVE_PARTITION
+		if (DB_IS_PARTITIONED(dbp)) {
+			if ((ret = __partc_dup(dbc_orig, dbc_n)) != 0)
+				goto err;
+		} else
+#endif
 		switch (dbc_orig->dbtype) {
 		case DB_QUEUE:
 			if ((ret = __qamc_dup(dbc_orig, dbc_n)) != 0)
@@ -859,7 +1176,11 @@ __dbc_iget(dbc, key, data, flags)
 	 * we acquire a write lock in the primary tree and no locks in the
 	 * off-page dup tree.  If the DB_RMW flag was specified and the get
 	 * operation is done in an off-page duplicate tree, call the primary
-	 * cursor's upgrade routine first.
+	 * cursor's upgrade routine first.  We fetch the primary tree's data
+	 * page to follow the buffer latching order rules for btrees: latch from
+	 * the top of the main tree down, even when also searching OPD trees.
+	 * Deadlocks could otherwise occur if we need to fetch the main page
+	 * while an OPD page is latched. [#22532]
 	 */
 	cp = dbc->internal;
 	if (cp->opd != NULL &&
@@ -868,6 +1189,10 @@ __dbc_iget(dbc, key, data, flags)
 	    flags == DB_PREV || flags == DB_PREV_DUP)) {
 		if (tmp_rmw && (ret = dbc->am_writelock(dbc)) != 0)
 			goto err;
+		if (cp->page == NULL && (ret = __memp_fget(mpf, &cp->pgno,
+		    dbc->thread_info, dbc->txn, 0, &cp->page)) != 0)
+			goto err;
+
 		if (F_ISSET(dbc, DBC_TRANSIENT))
 			opd = cp->opd;
 		else if ((ret = __dbc_idup(cp->opd, &opd, DB_POSITION)) != 0)
@@ -1660,7 +1985,7 @@ __dbc_put_secondaries(dbc,
 				    tskeyp, &oldpkey, rmw | DB_SET);
 				if (ret == 0) {
 					cmp = __bam_defcmp(sdbp,
-					    &oldpkey, pkey);
+					    &oldpkey, pkey, NULL);
 					__os_ufree(env, oldpkey.data);
 					/*
 					 * If the secondary key is unchanged,
@@ -1868,7 +2193,7 @@ __dbc_put_primary(dbc, key, data, flags)
 		olddata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM;
 		ret = __dbc_get(dbc, key, &olddata, DB_SET);
 		if (ret == 0) {
-			ret = DB_KEYEXIST;
+			ret = DBC_ERR(dbc, DB_KEYEXIST);
 			goto done;
 		} else if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY)
 			goto err;
@@ -2100,7 +2425,7 @@ __dbc_iput(dbc, key, data, flags)
 		if (dbc->dbtype == DB_HASH && F_ISSET(
 		    ((BTREE_CURSOR *)(dbc->internal->opd->internal)),
 		    C_DELETED)) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 
@@ -2228,7 +2553,7 @@ __dbc_del_oldskey(sdbp, dbc, skey, pkey, olddata)
 		 */
 		for (i = 0, tskeyp = skey; i < nskey; i++, tskeyp++)
 			if (((BTREE *)sdbp->bt_internal)->bt_compare(sdbp,
-			    toldskeyp, tskeyp) == 0) {
+			    toldskeyp, tskeyp, NULL) == 0) {
 				nsame++;
 				F_CLR(tskeyp, DB_DBT_ISSET);
 				break;
@@ -2382,12 +2707,14 @@ __dbc_cleanup(dbc, dbc_n, failed)
 	 * cursors.
 	 */
 	if (!failed && ret == 0) {
+		MUTEX_LOCK(dbp->env, dbp->mutex);
 		if (opd != NULL)
 			opd->internal->pdbc = dbc;
 		if (internal->opd != NULL)
 			internal->opd->internal->pdbc = dbc_n;
 		dbc->internal = dbc_n->internal;
 		dbc_n->internal = internal;
+		MUTEX_UNLOCK(dbp->env, dbp->mutex);
 	}
 
 	/*
@@ -3501,6 +3828,32 @@ __db_check_skeyset(sdbp, skeyp)
 		for (key2 = key1 + 1; key2 < last_key; key2++)
 			DB_ASSERT(env,
 			    ((BTREE *)sdbp->bt_internal)->bt_compare(sdbp,
-			    key1, key2) != 0);
+			    key1, key2, NULL) != 0);
+}
+#endif
+
+#ifdef HAVE_ERROR_HISTORY
+/*
+ * __dbc_diags
+ *	Save the context which triggers the "first notice" of an error code;
+ *	i.e., its creation. It doesn't touch anything when err == 0.
+ *
+ * PUBLIC: int __dbc_diags __P((DBC *, int));
+ */
+ int
+ __dbc_diags(dbc, err)
+	DBC *dbc;
+	int err;
+{
+	DB_MSGBUF *mb;
+
+	if (err != 0 && dbc->env != NULL &&
+	    (mb = __db_deferred_get()) != NULL) {
+		(void)__db_remember_context(dbc->env, mb, err);
+		__db_msgadd(dbc->env, mb, "DB: %s:%s\n" ,
+			dbc->dbp->fname == NULL ? "in-mem" : dbc->dbp->fname,
+			dbc->dbp->dname == NULL ? "" : dbc->dbp->fname);
+	}
+	return (err);
 }
 #endif
diff --git a/src/db/db_cds.c b/src/db/db_cds.c
index 185d5487..d3cc990a 100644
--- a/src/db/db_cds.c
+++ b/src/db/db_cds.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -43,7 +43,15 @@ static int
 __cdsgroup_abort(txn)
 	DB_TXN *txn;
 {
-	return (__cdsgroup_notsup(txn->mgrp->env, "abort"));
+	ENV *env;
+
+	env = txn->mgrp->env;
+	/*
+	 * As the txn handle can not be used any more, we call
+	 * __cdsgroup_commit to release the lock and destroy the handle.
+	 */
+	(void)__cdsgroup_commit(txn, 0);
+	return (__cdsgroup_notsup(env, "abort"));
 }
 
 static int
@@ -83,8 +91,16 @@ static int __cdsgroup_discard(txn, flags)
 	DB_TXN *txn;
 	u_int32_t flags;
 {
+	ENV *env;
+
 	COMPQUIET(flags, 0);
-	return (__cdsgroup_notsup(txn->mgrp->env, "discard"));
+	env = txn->mgrp->env;
+	/*
+	 * As the txn handle can not be used any more, we call
+	 * __cdsgroup_commit to release the lock and destroy the handle.
+	 */
+	(void)__cdsgroup_commit(txn, 0);
+	return (__cdsgroup_notsup(env, "discard"));
 }
 
 static u_int32_t __cdsgroup_id(txn)
diff --git a/src/db/db_compact.c b/src/db/db_compact.c
index d0f4801e..afe5a997 100644
--- a/src/db/db_compact.c
+++ b/src/db/db_compact.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -262,9 +262,11 @@ err:		if (txn_local && txn != NULL) {
 done:	if (LF_ISSET(DB_FREE_SPACE)) {
 		DBMETA *meta;
 		db_pgno_t pgno;
+		int pgs_done;
 
 		pgno = PGNO_BASE_MD;
 		isdone = 1;
+		pgs_done = 0;
 		if (ret == 0 && !LF_ISSET(DB_FREELIST_ONLY) &&
 		    __memp_fget(dbp->mpf, &pgno, ip, txn, 0, &meta) == 0) {
 			isdone = meta->free == PGNO_INVALID;
@@ -281,7 +283,8 @@ done:	if (LF_ISSET(DB_FREE_SPACE)) {
 		} else
 #endif
 		if (!isdone)
-			ret = __bam_truncate_ipages(dbp, ip, txn_orig, c_data);
+			ret = __bam_truncate_ipages(dbp,
+			    ip, txn_orig, c_data, &pgs_done);
 
 		/* Clean up the free list. */
 		if (list != NULL)
@@ -387,17 +390,26 @@ err:	if (dbc != NULL && (t_ret = __LPUT(dbc, lock)) != 0 && ret == 0)
 #endif
 
 /*
- * __db_exchange_page -- swap a page with a lower numbered page.
- * The routine will optionally free the higher numbered page.  The cursor
- * has a stack which includes at least the immediate parent of this page.
- * PUBLIC: int __db_exchange_page __P((DBC *, PAGE **, PAGE *, db_pgno_t, int));
+ * __db_exchange_page -- try to move a page 'down', to earlier in the file.
+ *
+ * This tries to move a page to a lower location the file, by swapping it
+ * with an earlier free page. The free page comes either from the free list or
+ * the newpgno parameter (e.g., __ham_compact_hash()).  If the new page turns
+ * out to be higher than the original one, the allocation is undone and
+ * the caller is left unchanged.  After a successful swap, this routine can
+ * optionally free the old, higher numbered page.
+ * The cursor's stack includes at least the immediate parent of this page.
+ *
+ * PUBLIC: int __db_exchange_page
+ * PUBLIC:    __P((DBC *, PAGE **, PAGE *, db_pgno_t, int, int *));
  */
 int
-__db_exchange_page(dbc, pgp, opg, newpgno, flags)
+__db_exchange_page(dbc, pgp, opg, newpgno, flags, pgs_donep)
 	DBC *dbc;
 	PAGE **pgp, *opg;
 	db_pgno_t newpgno;
 	int flags;
+	int *pgs_donep;
 {
 	BTREE_CURSOR *cp;
 	DB *dbp;
@@ -445,7 +457,9 @@ __db_exchange_page(dbc, pgp, opg, newpgno, flags)
 	 * are allocating at the same time, if so, just put it back.
 	 */
 	if (PGNO(newpage) > PGNO(*pgp)) {
-		/* Its unfortunate but you can't just free a new overflow. */
+		/* It is unfortunate but you can't just free a new overflow. */
+		/* XXX Is the above comment still true? */
+		/* XXX Should __db_new(OVERFLOW) zero OV_LEN()? */
 		if (TYPE(newpage) == P_OVERFLOW)
 			OV_LEN(newpage) = 0;
 		if ((ret = __LPUT(dbc, lock)) != 0)
@@ -572,7 +586,9 @@ __db_exchange_page(dbc, pgp, opg, newpgno, flags)
 	if ((ret = __TLPUT(dbc, lock)) != 0)
 		return (ret);
 
-done:	return (0);
+done:
+	(*pgs_donep)++;
+	return (0);
 
 err:	(void)__memp_fput(dbp->mpf, dbc->thread_info, newpage, dbc->priority);
 	(void)__TLPUT(dbc, lock);
@@ -584,15 +600,16 @@ err:	(void)__memp_fput(dbp->mpf, dbc->thread_info, newpage, dbc->priority);
  *	Walk the pages of an overflow chain and swap out
  * high numbered pages.  We are passed the first page
  * but only deal with the second and subsequent pages.
- * PUBLIC:  int __db_truncate_overflow __P((DBC *,
- * PUBLIC:     db_pgno_t, PAGE **, DB_COMPACT *));
+ * PUBLIC: int __db_truncate_overflow __P((DBC *, db_pgno_t,
+ * PUBLIC:    PAGE **, DB_COMPACT *, int *));
  */
 int
-__db_truncate_overflow(dbc, pgno, ppg, c_data)
+__db_truncate_overflow(dbc, pgno, ppg, c_data, pgs_donep)
 	DBC *dbc;
 	db_pgno_t pgno;
 	PAGE **ppg;
 	DB_COMPACT *c_data;
+	int *pgs_donep;
 {
 	DB *dbp;
 	DB_LOCK lock;
@@ -618,7 +635,7 @@ __db_truncate_overflow(dbc, pgno, ppg, c_data)
 			return (ret);
 		if (pgno <= c_data->compact_truncate)
 			continue;
-		if (have_lock == 0) {
+		if (!have_lock) {
 			DB_ASSERT(dbp->env, ppg != NULL);
 			ppgno = PGNO(*ppg);
 			if ((ret = __memp_fput(dbp->mpf, dbc->thread_info,
@@ -635,30 +652,32 @@ __db_truncate_overflow(dbc, pgno, ppg, c_data)
 			have_lock = 1;
 		}
 		if ((ret = __db_exchange_page(dbc,
-		    &page, NULL, PGNO_INVALID, DB_EXCH_FREE)) != 0)
+		    &page, NULL, PGNO_INVALID, DB_EXCH_FREE, pgs_donep)) != 0)
 			break;
 	}
 
 err:	if (page != NULL &&
-	    (t_ret = __memp_fput( dbp->mpf,
+	    (t_ret = __memp_fput(dbp->mpf,
 	    dbc->thread_info, page, dbc->priority)) != 0 && ret == 0)
 		ret = t_ret;
 	if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0)
 		ret = t_ret;
 	return (ret);
 }
+
 /*
  * __db_truncate_root -- swap a root page for a lower numbered page.
  * PUBLIC: int __db_truncate_root __P((DBC *,
- * PUBLIC:      PAGE *, u_int32_t, db_pgno_t *, u_int32_t));
+ * PUBLIC:      PAGE *, u_int32_t, db_pgno_t *, u_int32_t, int *));
  */
 int
-__db_truncate_root(dbc, ppg, indx, pgnop, tlen)
+__db_truncate_root(dbc, ppg, indx, pgnop, tlen, pgs_donep)
 	DBC *dbc;
 	PAGE *ppg;
 	u_int32_t indx;
 	db_pgno_t *pgnop;
 	u_int32_t tlen;
+	int *pgs_donep;
 {
 	DB *dbp;
 	DBT orig;
@@ -693,7 +712,7 @@ __db_truncate_root(dbc, ppg, indx, pgnop, tlen)
 	} else {
 		LOCK_CHECK_OFF(dbc->thread_info);
 		ret = __db_exchange_page(dbc,
-		    &page, NULL, PGNO_INVALID, DB_EXCH_FREE);
+		    &page, NULL, PGNO_INVALID, DB_EXCH_FREE, pgs_donep);
 		LOCK_CHECK_ON(dbc->thread_info);
 		if (ret != 0)
 			goto err;
@@ -705,8 +724,7 @@ __db_truncate_root(dbc, ppg, indx, pgnop, tlen)
 
 	/* Update the reference. */
 	if (DBC_LOGGING(dbc)) {
-		if ((ret = __db_pgno_log(dbp,
-		     dbc->txn, &LSN(ppg), 0, PGNO(ppg),
+		if ((ret = __db_pgno_log(dbp, dbc->txn, &LSN(ppg), 0, PGNO(ppg),
 		     &LSN(ppg), (u_int32_t)indx, *pgnop, newpgno)) != 0)
 			goto err;
 	} else
@@ -780,13 +798,13 @@ __db_find_free(dbc, type, size, bstart, freep)
 		goto err;
 
 	if (nelems == 0) {
-		ret = DB_NOTFOUND;
+		ret = DBC_ERR(dbc, DB_NOTFOUND);
 		goto err;
 	}
 
 	for (i = 0; i < nelems; i++) {
 		if (list[i] > bstart) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 		start = i;
@@ -812,7 +830,7 @@ __db_find_free(dbc, type, size, bstart, freep)
 			goto found;
 		}
 	}
-	ret = DB_NOTFOUND;
+	ret = DBC_ERR(dbc, DB_NOTFOUND);
 	goto err;
 
 found:	/* We have size range of pages.  Remove them. */
@@ -1005,13 +1023,15 @@ err:	if (np != NULL && np != otherp)
  * __db_move_metadata -- move a meta data page to a lower page number.
  * The meta data page must be exclusively latched on entry.
  *
- * PUBLIC: int __db_move_metadata __P((DBC *, DBMETA **, DB_COMPACT *));
+ * PUBLIC: int __db_move_metadata
+ * PUBLIC:     __P((DBC *, DBMETA **, DB_COMPACT *, int *));
  */
 int
-__db_move_metadata(dbc, metap, c_data)
+__db_move_metadata(dbc, metap, c_data, pgs_donep)
 	DBC *dbc;
 	DBMETA **metap;
 	DB_COMPACT *c_data;
+	int *pgs_donep;
 {
 	BTREE *bt;
 	DB *dbp, *mdbp;
@@ -1023,7 +1043,7 @@ __db_move_metadata(dbc, metap, c_data)
 
 	c_data->compact_pages_examine++;
 	if ((ret = __db_exchange_page(dbc,
-	     (PAGE**)metap, NULL, PGNO_INVALID, DB_EXCH_FREE)) != 0)
+	     (PAGE **)metap, NULL, PGNO_INVALID, DB_EXCH_FREE, pgs_donep)) != 0)
 		return (ret);
 
 	if (PGNO(*metap) == dbp->meta_pgno)
diff --git a/src/db/db_conv.c b/src/db/db_conv.c
index 210b4d6e..77c6b760 100644
--- a/src/db/db_conv.c
+++ b/src/db/db_conv.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -487,8 +487,12 @@ __db_byteswap(dbp, pg, h, pagesize, pgin)
 {
 	ENV *env;
 	BINTERNAL *bi;
+	BBLOB *bl;
 	BKEYDATA *bk;
 	BOVERFLOW *bo;
+	HEAPBLOBHDR *bhdr;
+	HEAPHDR *hh;
+	HEAPSPLITHDR *hsh;
 	RINTERNAL *ri;
 	db_indx_t i, *inp, len, tmp;
 	u_int8_t *end, *p, *pgend;
@@ -500,8 +504,14 @@ __db_byteswap(dbp, pg, h, pagesize, pgin)
 		M_32_SWAP(h->lsn.file);
 		M_32_SWAP(h->lsn.offset);
 		M_32_SWAP(h->pgno);
-		M_32_SWAP(h->prev_pgno);
-		M_32_SWAP(h->next_pgno);
+		if (TYPE(h) == P_HEAP) {
+			M_32_SWAP(((HEAPPG *)h)->high_pgno);
+			M_16_SWAP(((HEAPPG *)h)->high_indx);
+			M_16_SWAP(((HEAPPG *)h)->free_indx);
+		} else {
+			M_32_SWAP(h->prev_pgno);
+			M_32_SWAP(h->next_pgno);
+		}
 		M_16_SWAP(h->entries);
 		M_16_SWAP(h->hf_offset);
 	}
@@ -527,6 +537,14 @@ __db_byteswap(dbp, pg, h, pagesize, pgin)
 				continue;
 
 			switch (HPAGE_TYPE(dbp, h, i)) {
+			case H_BLOB:
+				p = HBLOB_ID(P_ENTRY(dbp, h, i));
+				SWAP64(p);			/* id */
+				SWAP64(p);			/* size */
+				p = HBLOB_FILE_ID(P_ENTRY(dbp, h, i));
+				SWAP64(p);			/* file id */
+				SWAP64(p);			/* sdb id */
+				break;
 			case H_KEYDATA:
 				break;
 			case H_DUPLICATE:
@@ -599,6 +617,14 @@ __db_byteswap(dbp, pg, h, pagesize, pgin)
 			if ((u_int8_t *)bk >= pgend)
 				continue;
 			switch (B_TYPE(bk->type)) {
+			case B_BLOB:
+				bl = (BBLOB *)bk;
+				M_16_SWAP(bl->len);
+				M_64_SWAP(bl->id);		/* id */
+				M_64_SWAP(bl->size);		/* size */
+				M_64_SWAP(bl->file_id);		/* file id */
+				M_64_SWAP(bl->sdb_id);		/* sdb id */
+				break;
 			case B_KEYDATA:
 				M_16_SWAP(bk->len);
 				break;
@@ -663,6 +689,32 @@ __db_byteswap(dbp, pg, h, pagesize, pgin)
 		}
 		break;
 	case P_HEAP:
+		for (i = 0; i <= HEAP_HIGHINDX(h); i++) {
+			if (pgin)
+				M_16_SWAP(inp[i]);
+			if (inp[i] == 0)
+				continue;
+
+			hh = (HEAPHDR *)P_ENTRY(dbp, h, i);
+			if ((u_int8_t *)hh >= pgend)
+				continue;
+			M_16_SWAP(hh->size);
+			if (F_ISSET(hh, HEAP_RECSPLIT)) {
+				hsh = (HEAPSPLITHDR *)hh;
+				M_32_SWAP(hsh->tsize);
+				M_32_SWAP(hsh->nextpg);
+				M_16_SWAP(hsh->nextindx);
+			} else if (F_ISSET(hh, HEAP_RECBLOB)) {
+				bhdr = (HEAPBLOBHDR *)hh;
+				M_64_SWAP(bhdr->id);		/* id */
+				M_64_SWAP(bhdr->size);		/* size */
+				M_64_SWAP(bhdr->file_id);	/* file id */
+			}
+
+			if (!pgin)
+				M_16_SWAP(inp[i]);
+		}
+		break;
 	case P_IHEAP:
 	case P_INVALID:
 	case P_OVERFLOW:
@@ -678,8 +730,14 @@ out:	if (!pgin) {
 		M_32_SWAP(h->lsn.file);
 		M_32_SWAP(h->lsn.offset);
 		M_32_SWAP(h->pgno);
-		M_32_SWAP(h->prev_pgno);
-		M_32_SWAP(h->next_pgno);
+		if (TYPE(h) == P_HEAP) {
+			M_32_SWAP(((HEAPPG *)h)->high_pgno);
+			M_16_SWAP(((HEAPPG *)h)->high_indx);
+			M_16_SWAP(((HEAPPG *)h)->free_indx);
+		} else {
+			M_32_SWAP(h->prev_pgno);
+			M_32_SWAP(h->next_pgno);
+		}
 		M_16_SWAP(h->entries);
 		M_16_SWAP(h->hf_offset);
 	}
@@ -718,7 +776,10 @@ __db_pageswap(env, dbp, pp, len, pdata, pgin)
 
 	case P_HASHMETA:
 		return (__ham_mswap(env, pp));
-
+#ifdef HAVE_HEAP
+	case P_HEAPMETA:
+		return (__heap_mswap(env, pp));
+#endif
 	case P_QAMMETA:
 		return (__qam_mswap(env, pp));
 
@@ -794,12 +855,17 @@ __db_recordswap(op, size, hdr, data, pgin)
 	void *hdr, *data;
 	u_int32_t pgin;
 {
+	BBLOB *bl;
 	BKEYDATA *bk;
 	BOVERFLOW *bo;
 	BINTERNAL *bi;
+	DBT *dbt;
+	HEAPHDR *hh;
+	HEAPBLOBHDR bhdr;
+	HEAPSPLITHDR *hsh;
 	RINTERNAL *ri;
 	db_indx_t tmp;
-	u_int8_t *p, *end;
+	u_int8_t buf[HEAPBLOBREC_SIZE], *end, *p;
 
 	if (size == 0)
 		return;
@@ -812,6 +878,14 @@ __db_recordswap(op, size, hdr, data, pgin)
 		case B_KEYDATA:
 			M_16_SWAP(bk->len);
 			break;
+		case B_BLOB:
+			bl = (BBLOB *)bk;
+			M_16_SWAP(bl->len);
+			M_64_SWAP(bl->id);		/* id */
+			M_64_SWAP(bl->size);		/* size */
+			M_64_SWAP(bl->file_id);		/* file id */
+			M_64_SWAP(bl->sdb_id);		/* sdb id */
+			break;
 		case B_DUPLICATE:
 		case B_OVERFLOW:
 			bo = (BOVERFLOW *)hdr;
@@ -835,6 +909,7 @@ __db_recordswap(op, size, hdr, data, pgin)
 			} else
 				bo = (BOVERFLOW *)data;
 			M_32_SWAP(bo->pgno);
+			M_32_SWAP(bo->tlen);
 		}
 		break;
 	case P_IRECNO:
@@ -867,10 +942,10 @@ __db_recordswap(op, size, hdr, data, pgin)
 				SWAP16(p);
 			}
 			break;
-		/* These two record types include the full header. */
+		/* These three record types include the full header. */
 		case H_OFFDUP:
 			p = (u_int8_t *)hdr;
-			p += SSZ(HOFFPAGE, pgno);
+			p += SSZ(HOFFDUP, pgno);
 			SWAP32(p);			/* pgno */
 			break;
 		case H_OFFPAGE:
@@ -879,11 +954,61 @@ __db_recordswap(op, size, hdr, data, pgin)
 			SWAP32(p);			/* pgno */
 			SWAP32(p);			/* tlen */
 			break;
+		case H_BLOB:
+			p = HBLOB_ID(hdr);
+			SWAP64(p);			/* id */
+			SWAP64(p);			/* size */
+			p = HBLOB_FILE_ID(hdr);
+			SWAP64(p);			/* file id */
+			SWAP64(p);			/* sdb id */
+			break;
 		default:
 			DB_ASSERT(NULL, op != op);
 		}
 		break;
-
+	case P_HEAP:
+		hh = (HEAPHDR *)hdr;
+		M_16_SWAP(hh->size);
+		if (F_ISSET(hh, HEAP_RECSPLIT)) {
+			hsh = (HEAPSPLITHDR *)hdr;
+			M_32_SWAP(hsh->tsize);
+			M_32_SWAP(hsh->nextpg);
+			M_16_SWAP(hsh->nextindx);
+		}else if (F_ISSET(hh, HEAP_RECBLOB)) {
+			/*
+			 * Heap blob records are broken into two parts when
+			 * logged, the shared header and the part that is
+			 * unique to blob records, which is stored in the
+			 * log data field.
+			 */
+			if (data != NULL) {
+				dbt = NULL;
+				if (pgin) {
+					dbt = data;
+					memcpy(buf + sizeof(HEAPHDR),
+					    dbt->data, HEAPBLOBREC_DSIZE);
+				} else {
+					memcpy(buf + sizeof(HEAPHDR),
+					    data, HEAPBLOBREC_DSIZE);
+				}
+				memcpy(&bhdr, buf, HEAPBLOBREC_SIZE);
+				M_64_SWAP(bhdr.id);		/* id */
+				M_64_SWAP(bhdr.size);		/* size */
+				M_64_SWAP(bhdr.file_id);	/* file id */
+				memcpy(buf, &bhdr, HEAPBLOBREC_SIZE);
+				if (pgin) {
+					memcpy(dbt->data,
+					    HEAPBLOBREC_DATA(buf),
+					    HEAPBLOBREC_DSIZE);
+				} else {
+					memcpy(data,
+					    HEAPBLOBREC_DATA(buf),
+					    HEAPBLOBREC_DSIZE);
+				}
+			}
+			break;
+		}
+		break;
 	default:
 		DB_ASSERT(NULL, op != op);
 	}
diff --git a/src/db/db_copy.c b/src/db/db_copy.c
index 359c74be..d9786702 100644
--- a/src/db/db_copy.c
+++ b/src/db/db_copy.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2011, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2011, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/db/db_dispatch.c b/src/db/db_dispatch.c
index 06de4ef7..7cb7f9ca 100644
--- a/src/db/db_dispatch.c
+++ b/src/db/db_dispatch.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1995, 1996
@@ -639,7 +639,7 @@ __db_txnlist_find(env, hp, txnid, statusp)
 	DB_TXNLIST *entry;
 
 	if (txnid == 0)
-		return (DB_NOTFOUND);
+		return (USR_ERR(env, DB_NOTFOUND));
 
 	return (__db_txnlist_find_internal(env, hp,
 	    TXNLIST_TXNID, txnid, &entry, 0, statusp));
@@ -666,7 +666,7 @@ __db_txnlist_update(env, hp, txnid, status, lsn, ret_status, add_ok)
 	int ret;
 
 	if (txnid == 0)
-		return (DB_NOTFOUND);
+		return (USR_ERR(env, DB_NOTFOUND));
 
 	ret = __db_txnlist_find_internal(env,
 	    hp, TXNLIST_TXNID, txnid, &elp, 0, ret_status);
@@ -715,7 +715,7 @@ __db_txnlist_find_internal(env,
 	ret = 0;
 
 	if (hp == NULL)
-		return (DB_NOTFOUND);
+		return (USR_ERR(env, DB_NOTFOUND));
 
 	switch (type) {
 	case TXNLIST_TXNID:
@@ -759,7 +759,7 @@ __db_txnlist_find_internal(env,
 		return (ret);
 	}
 
-	return (DB_NOTFOUND);
+	return (USR_ERR(env, DB_NOTFOUND));
 }
 
 /*
diff --git a/src/db/db_dup.c b/src/db/db_dup.c
index 9fd04791..e66ec92b 100644
--- a/src/db/db_dup.c
+++ b/src/db/db_dup.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/db/db_iface.c b/src/db/db_iface.c
index 59e0ba53..da6140a4 100644
--- a/src/db/db_iface.c
+++ b/src/db/db_iface.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -159,9 +159,15 @@ __db_associate_arg(dbp, sdbp, callback, flags)
 
 	env = dbp->env;
 
+	if (dbp->blob_threshold || sdbp->blob_threshold) {
+		__db_errx(env, DB_STR("0751",
+		    "Secondary and primary databases cannot support blobs."));
+		return (EINVAL);
+	}
+
 	if (sdbp->type == DB_HEAP) {
-		__db_errx(env,
-		    "Heap databases may not be used as secondary databases");
+		__db_errx(env, DB_STR("0752",
+		    "Heap databases may not be used as secondary databases"));
 		return (EINVAL);
 	}
 
@@ -288,6 +294,7 @@ __db_cursor_pp(dbp, txn, dbcp, flags)
 	int rep_blocked, ret;
 
 	env = dbp->env;
+	(*dbcp) = NULL;
 
 	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor");
 
@@ -331,7 +338,8 @@ __db_cursor_pp(dbp, txn, dbcp, flags)
 	 * If a family transaction was passed in, the transaction handle in
 	 * the cursor may not match.
 	 */
-	txn = (*dbcp)->txn;
+	if ((*dbcp) != NULL)
+	    txn = (*dbcp)->txn;
 	if (txn != NULL && ret == 0)
 		TAILQ_INSERT_HEAD(&(txn->my_cursors), *dbcp, txn_cursors);
 
@@ -434,6 +442,13 @@ __db_cursor_arg(dbp, flags)
 			return (__db_fnl(env, "DB->cursor"));
 	}
 
+	if (dbp->blob_threshold &&
+	    LF_ISSET(DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT)) {
+		__db_errx(dbp->env, DB_STR("0753",
+"Blob enabled databases do not support READ_UNCOMMITTED and TXN_SNAPSHOT."));
+		return (EINVAL);
+	}
+
 	LF_CLR(DB_CURSOR_BULK |
 	    DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT);
 
@@ -828,6 +843,12 @@ __db_get_arg(dbp, key, data, flags)
 
 	env = dbp->env;
 
+	if (dbp->blob_threshold && LF_ISSET(DB_READ_UNCOMMITTED)) {
+		__db_errx(env, DB_STR("0754",
+	"Blob enabled databases do not support DB_READ_UNCOMMITTED."));
+		return (EINVAL);
+	}
+
 	/*
 	 * Check for read-modify-write validity.  DB_RMW doesn't make sense
 	 * with CDB cursors since if you're going to write the cursor, you
@@ -876,6 +897,9 @@ __db_get_arg(dbp, key, data, flags)
 		break;
 	case DB_CONSUME:
 	case DB_CONSUME_WAIT:
+		if (DB_IS_READONLY(dbp))
+			return (__db_rdonly(env,
+			    "DB->get CONSUME/CONSUME_WAIT"));
 		if (dirty) {
 			__db_errx(env, DB_STR_A("0583",
 		    "%s is not supported with DB_CONSUME or DB_CONSUME_WAIT",
@@ -1148,6 +1172,13 @@ __db_open_pp(dbp, txn, fname, dname, type, flags, mode)
 	/* Save the current DB handle flags for refresh. */
 	dbp->orig_flags = dbp->flags;
 
+	if (fname == 0 && PREFMAS_IS_SET(env)) {
+		__db_errx(env, DB_STR("0783", "In-memory databases are not "
+		    "supported in Replication Manager preferred master mode"));
+		ret = EINVAL;
+		goto err;
+	}
+
 	/* Check for replication block. */
 	handle_check = IS_ENV_REPLICATED(env);
 	if (handle_check &&
@@ -1389,6 +1420,18 @@ __db_open_arg(dbp, txn, fname, dname, type, flags)
 		return (EINVAL);
 	}
 
+	if (LF_ISSET(DB_MULTIVERSION) && dbp->blob_threshold) {
+		__db_errx(env, DB_STR("0755",
+		    "DB_MULTIVERSION illegal with blob enabled databases"));
+		return (EINVAL);
+	}
+
+	if (LF_ISSET(DB_READ_UNCOMMITTED) && dbp->blob_threshold) {
+		__db_errx(env, DB_STR("0756",
+	"DB_READ_UNCOMMITTED illegal with blob enabled databases"));
+		return (EINVAL);
+	}
+
 	/* DB_TRUNCATE is neither transaction recoverable nor lockable. */
 	if (LF_ISSET(DB_TRUNCATE) && (LOCKING_ON(env) || txn != NULL)) {
 		__db_errx(env, DB_STR_A("0599",
@@ -1901,8 +1944,6 @@ __db_compact_pp(dbp, txn, start, stop, c_data, flags, end)
 		ret = __db_compact_int(dbp, ip,
 		    txn, start, stop, dp, flags, end);
 		break;
-	case DB_HEAP:
-		break;
 	default:
 		ret = __dbh_am_chk(dbp, DB_OK_BTREE);
 		break;
@@ -2893,7 +2934,7 @@ __dbt_ferr(dbp, name, dbt, check_thread)
 	 * database, without having to clear flags.
 	 */
 	if ((ret = __db_fchk(env, name, dbt->flags,
-	    DB_DBT_APPMALLOC | DB_DBT_BULK | DB_DBT_DUPOK |
+	    DB_DBT_APPMALLOC | DB_DBT_BLOB | DB_DBT_BULK | DB_DBT_DUPOK |
 	    DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERCOPY |
 	    DB_DBT_USERMEM | DB_DBT_PARTIAL | DB_DBT_READONLY)) != 0)
 		return (ret);
diff --git a/src/db/db_join.c b/src/db/db_join.c
index 751cf9e2..24d5260e 100644
--- a/src/db/db_join.c
+++ b/src/db/db_join.c
@@ -1,7 +1,7 @@
 /*
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -717,7 +717,6 @@ __db_join_close(dbc)
 	DBC *dbc;
 {
 	DB *dbp;
-	DB_THREAD_INFO *ip;
 	ENV *env;
 	JOIN_CURSOR *jc;
 	int ret, t_ret;
@@ -737,7 +736,6 @@ __db_join_close(dbc)
 	TAILQ_REMOVE(&dbp->join_queue, dbc, links);
 	MUTEX_UNLOCK(env, dbp->mutex);
 
-	ENV_ENTER(env, ip);
 	/*
 	 * Close any open scratch cursors.  In each case, there may
 	 * not be as many outstanding as there are cursors in
@@ -757,7 +755,6 @@ __db_join_close(dbc)
 		    (t_ret = __dbc_close(jc->j_fdupcurs[i])) != 0)
 			ret = t_ret;
 	}
-	ENV_LEAVE(env, ip);
 
 	__os_free(env, jc->j_exhausted);
 	__os_free(env, jc->j_curslist);
@@ -796,7 +793,7 @@ __db_join_getnext(dbc, key, data, exhausted, opmods)
 	int ret, cmp;
 	DB *dbp;
 	DBT ldata;
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
 
 	dbp = dbc->dbp;
 	func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
@@ -812,7 +809,7 @@ __db_join_getnext(dbc, key, data, exhausted, opmods)
 		if ((ret = __dbc_get(dbc,
 		    key, &ldata, opmods | DB_CURRENT)) != 0)
 			break;
-		cmp = func(dbp, data, &ldata);
+		cmp = func(dbp, data, &ldata, NULL);
 		if (cmp == 0) {
 			/*
 			 * We have to return the real data value.  Copy
diff --git a/src/db/db_meta.c b/src/db/db_meta.c
index 8f97ebd8..53cf77cc 100644
--- a/src/db/db_meta.c
+++ b/src/db/db_meta.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -939,12 +939,14 @@ done:	if (last_pgnop != NULL)
 		*last_pgnop = meta->last_pgno;
 
 	/*
-	 * The truncate point is the number of pages in the free
-	 * list back from the last page.  The number of pages
-	 * in the free list are the number that we can swap in.
-	 * Adjust it down slightly so if we find higher numbered
-	 * pages early and then free other pages later we can
-	 * truncate them.
+	 * Set the truncation point which determines which pages may be
+	 * relocated. Pages above are candidates to be swapped with a lower one
+	 * from the freelist by __db_exchange_page(); pages before the truncate
+	 * point are not relocated.
+	 * The truncation point starts as N pages less than the last_pgno, where
+	 * N is the size of the free list. This is reduced by 1/4 in the hope
+	 * that partially full pages will be coalesced together, creating
+	 * additional free pages during the compact.
 	 */
 	if (c_data) {
 		c_data->compact_truncate = (u_int32_t)meta->last_pgno - nelems;
diff --git a/src/db/db_method.c b/src/db/db_method.c
index 82d03e5f..d807bab6 100644
--- a/src/db/db_method.c
+++ b/src/db/db_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/crypto.h"
 #include "dbinc/db_page.h"
 #include "dbinc/btree.h"
@@ -36,14 +37,15 @@ static int  __db_set_alloc __P((DB *, void *(*)(size_t),
 static int  __db_get_append_recno __P((DB *,
 		int (**)(DB *, DBT *, db_recno_t)));
 static int  __db_set_append_recno __P((DB *, int (*)(DB *, DBT *, db_recno_t)));
+static int  __db_get_blob_dir __P((DB *, const char **));
+static int  __db_set_blob_dir __P((DB *, const char *));
+static int  __db_get_blob_sub_dir __P((DB *, const char **));
 static int  __db_get_cachesize __P((DB *, u_int32_t *, u_int32_t *, int *));
 static int  __db_set_cachesize __P((DB *, u_int32_t, u_int32_t, int));
 static int  __db_get_create_dir __P((DB *, const char **));
 static int  __db_set_create_dir __P((DB *, const char *));
 static int  __db_get_dup_compare
-		__P((DB *, int (**)(DB *, const DBT *, const DBT *)));
-static int  __db_set_dup_compare
-		__P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+		__P((DB *, int (**)(DB *, const DBT *, const DBT *, size_t *)));
 static int  __db_get_encrypt_flags __P((DB *, u_int32_t *));
 static int  __db_set_encrypt __P((DB *, const char *, u_int32_t));
 static int  __db_get_feedback __P((DB *, void (**)(DB *, int, int)));
@@ -90,6 +92,12 @@ db_create(dbpp, dbenv, flags)
 	ip = NULL;
 	env = dbenv == NULL ? NULL : dbenv->env;
 
+#ifdef HAVE_ERROR_HISTORY
+	/* Call thread local storage initializer at least once per process. */
+	if (env == NULL)
+		__db_thread_init();
+#endif
+
 	/* Check for invalid function flags. */
 	switch (flags) {
 	case 0:
@@ -206,12 +214,11 @@ __db_create_internal(dbpp, env, flags)
 err:	if (dbp != NULL) {
 		if (dbp->mpf != NULL)
 			(void)__memp_fclose(dbp->mpf, 0);
+		if (F_ISSET(env, ENV_DBLOCAL))
+			(void)__env_close(dbp->dbenv, 0);
 		__os_free(env, dbp);
 	}
 
-	if (dbp != NULL && F_ISSET(env, ENV_DBLOCAL))
-		(void)__env_close(dbp->dbenv, 0);
-
 	return (ret);
 }
 
@@ -225,6 +232,7 @@ __db_init(dbp, flags)
 	u_int32_t flags;
 {
 	int ret;
+	u_int32_t bytes;
 
 	dbp->locker = NULL;
 	dbp->alt_close = NULL;
@@ -254,6 +262,9 @@ __db_init(dbp, flags)
 	dbp->get_alloc = __db_get_alloc;
 	dbp->get_append_recno = __db_get_append_recno;
 	dbp->get_assoc_flags = __db_get_assoc_flags;
+	dbp->get_blob_dir = __db_get_blob_dir;
+	dbp->get_blob_sub_dir = __db_get_blob_sub_dir;
+	dbp->get_blob_threshold = __db_get_blob_threshold;
 	dbp->get_byteswapped = __db_get_byteswapped;
 	dbp->get_cachesize = __db_get_cachesize;
 	dbp->get_create_dir = __db_get_create_dir;
@@ -290,6 +301,8 @@ __db_init(dbp, flags)
 	dbp->rename = __db_rename_pp;
 	dbp->set_alloc = __db_set_alloc;
 	dbp->set_append_recno = __db_set_append_recno;
+	dbp->set_blob_dir = __db_set_blob_dir;
+	dbp->set_blob_threshold = __db_set_blob_threshold;
 	dbp->set_cachesize = __db_set_cachesize;
 	dbp->set_create_dir = __db_set_create_dir;
 	dbp->set_dup_compare = __db_set_dup_compare;
@@ -316,7 +329,11 @@ __db_init(dbp, flags)
 	dbp->verify = __db_verify_pp;
 	/* DB PUBLIC HANDLE LIST END */
 
-					/* Access method specific. */
+	if ((ret = __env_get_blob_threshold_int(dbp->env, &bytes)) != 0)
+		return (ret);
+	dbp->blob_threshold = bytes;
+
+	/* Access method specific. */
 	if ((ret = __bam_db_create(dbp)) != 0)
 		return (ret);
 	if ((ret = __ham_db_create(dbp)) != 0)
@@ -535,6 +552,182 @@ __db_set_append_recno(dbp, func)
 }
 
 /*
+ * __db_get_blob_threshold --
+ *	Get the current threshold size at which records are stored as blobs.
+ *
+ *  PUBLIC: int __db_get_blob_threshold __P((DB *, u_int32_t *));
+ */
+int
+__db_get_blob_threshold(dbp, bytes)
+	DB *dbp;
+	u_int32_t *bytes;
+{
+	/*
+	 * While shared, this value never changes after open, so it is safe
+	 * to access it without mutex protection.
+	 */
+	*bytes = dbp->blob_threshold;
+
+	return (0);
+}
+
+/*
+ * __db_set_blob_threshold --
+ *	API to allow setting the threshold size at which records are stored
+ *	as blobs rather than in database items. No flags currently supported.
+ * PUBLIC: int __db_set_blob_threshold __P((DB *, u_int32_t, u_int32_t));
+ */
+int
+__db_set_blob_threshold(dbp, bytes, flags)
+	DB *dbp;
+	u_int32_t bytes;
+	u_int32_t flags;
+{
+	if (__db_fchk(dbp->env, "DB->set_blob_threshold", flags, 0) != 0)
+		return (EINVAL);
+
+	DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_blob_threshold");
+
+	if (bytes != 0 && F_ISSET(dbp,
+	    (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_DUP | DB_AM_DUPSORT))) {
+		__db_errx(dbp->env, DB_STR("0760",
+"Cannot enable blobs in databases with checksum, encryption, or duplicates."));
+		return (EINVAL);
+	}
+#ifdef HAVE_COMPRESSION
+	if (DB_IS_COMPRESSED(dbp) && bytes != 0) {
+		__db_errx(dbp->env, DB_STR("0761",
+		    "Cannot enable blobs in databases with compression."));
+		return (EINVAL);
+	}
+#endif
+
+	dbp->blob_threshold = bytes;
+
+	return (0);
+}
+
+/*
+ * __db_blobs_enabled --
+ *
+ * Used to tell if the database is configured to support blobs.
+ * PUBLIC: int __db_blobs_enabled __P((DB *));
+ */
+int
+__db_blobs_enabled(dbp)
+	DB *dbp;
+{
+	/* Blob threshold must be non-0. */
+	if (!dbp->blob_threshold)
+		return (0);
+	/* Blobs cannot support encryption or checksum, but that may change. */
+	if (F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT)))
+		return (0);
+	/* Blobs do not support compression, but that may change. */
+#ifdef HAVE_COMPRESSION
+	if (DB_IS_COMPRESSED(dbp))
+		return (0);
+#endif
+	if (dbp->env->dbenv != NULL &&
+	    F_ISSET(dbp->env->dbenv, DB_ENV_TXN_SNAPSHOT))
+		return (0);
+	/* Cannot support blobs in recno or queue. */
+	if (dbp->type == DB_RECNO || dbp->type == DB_QUEUE)
+		return (0);
+	/*
+	 * Cannot support dups because that would require comparing
+	 * blob data items.
+	 */
+	if (F_ISSET(dbp, (DB_AM_DUP | DB_AM_DUPSORT)))
+		return (0);
+	/* No place to put blob files when using an in-memory db. */
+	if (F_ISSET(dbp, (DB_AM_INMEM)))
+		return (0);
+
+	/* BDB managed databases should not support blobs. */
+	if ((dbp->fname != NULL && IS_DB_FILE(dbp->fname)) ||
+	    (dbp->dname != NULL && IS_DB_FILE(dbp->dname)))
+		return (0);
+
+	return (1);
+}
+
+/*
+ * __db_get_blob_sub_dir --
+ *
+ * Returns the subdirectory of the blob directory in which the blob files
+ * for the given db are stored, or NULL if there is none.
+ *
+ */
+static int
+__db_get_blob_sub_dir(dbp, dir)
+	DB *dbp;
+	const char **dir;
+{
+	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_blob_sub_dir");
+
+	*dir = dbp->blob_sub_dir;
+
+	return (0);
+}
+
+/*
+ * __db_get_blob_dir --
+ *
+ * Get the blob directory for this database.
+ */
+static int
+__db_get_blob_dir(dbp, dir)
+	DB *dbp;
+	const char **dir;
+{
+	DB_ENV *dbenv;
+	ENV *env;
+
+	env = dbp->env;
+	dbenv = dbp->env->dbenv;
+	*dir = NULL;
+
+	if (dbenv == NULL)
+		return (0);
+
+	if (dbenv->db_blob_dir != NULL)
+		*dir = dbenv->db_blob_dir;
+	else if (env->db_home != NULL)
+		*dir = BLOB_DEFAULT_DIR;
+
+	return (0);
+}
+
+/*
+ * __db_set_blob_dir --
+ *
+ * Set the blob directory in a local environment.
+ */
+static int
+__db_set_blob_dir(dbp, dir)
+	DB *dbp;
+	const char *dir;
+{
+	DB_ENV *dbenv;
+	ENV *env;
+
+	DB_ILLEGAL_IN_ENV(dbp, "DB->set_blob_dir");
+	DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_blob_dir");
+	env = dbp->env;
+	dbenv = dbp->env->dbenv;
+
+	if (dbenv == NULL)
+		return (0);
+
+	if (dbenv->db_blob_dir != NULL)
+		__os_free(env, dbenv->db_blob_dir);
+	dbenv->db_blob_dir = NULL;
+
+	return (__os_strdup(env, dir, &dbenv->db_blob_dir));
+}
+
+/*
  * __db_get_cachesize --
  *	Get underlying cache size.
  */
@@ -607,7 +800,7 @@ __db_get_create_dir(dbp, dirp)
 static int
 __db_get_dup_compare(dbp, funcp)
 	DB *dbp;
-	int (**funcp) __P((DB *, const DBT *, const DBT *));
+	int (**funcp) __P((DB *, const DBT *, const DBT *, size_t *));
 {
 
 	DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH);
@@ -628,11 +821,14 @@ __db_get_dup_compare(dbp, funcp)
 /*
  * __db_set_dup_compare --
  *	Set duplicate comparison routine.
+ *
+ * PUBLIC: int __db_set_dup_compare __P((DB *,
+ * PUBLIC:     int (*)(DB *, const DBT *, const DBT *, size_t *)));
  */
-static int
+int
 __db_set_dup_compare(dbp, func)
 	DB *dbp;
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
 {
 	int ret;
 
@@ -900,6 +1096,13 @@ __db_set_flags(dbp, flags)
 		ENV_REQUIRES_CONFIG(env,
 		    env->tx_handle, "DB_NOT_DURABLE", DB_INIT_TXN);
 
+	if (dbp->blob_threshold &&
+	    LF_ISSET(DB_CHKSUM | DB_ENCRYPT | DB_DUP | DB_DUPSORT)) {
+		__db_errx(dbp->env, DB_STR("0763",
+"Cannot enable checksum, encryption, or duplicates with blob support."));
+		return (EINVAL);
+	}
+
 	__db_map_flags(dbp, &flags, &dbp->flags);
 
 	if ((ret = __bam_set_flags(dbp, &flags)) != 0)
diff --git a/src/db/db_open.c b/src/db/db_open.c
index fefda48f..21074b15 100644
--- a/src/db/db_open.c
+++ b/src/db/db_open.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -119,6 +119,15 @@ __db_open(dbp, ip, txn, fname, dname, type, flags, mode, meta_pgno)
 		goto err;
 
 	/*
+	 * Silently disabled blobs in databases that cannot support them.
+	 * Most illegal configurations will have already been caught, this
+	 * is to allow a user to set an environment wide blob threshold, but
+	 * not have to explicitly turn it off for in-memory or queue databases.
+	 */
+	if (!__db_blobs_enabled(dbp))
+		dbp->blob_threshold = 0;
+
+	/*
 	 * If both fname and subname are NULL, it's always a create, so make
 	 * sure that we have both DB_CREATE and a type specified.  It would
 	 * be nice if this checking were done in __db_open where most of the
@@ -259,6 +268,11 @@ __db_open(dbp, ip, txn, fname, dname, type, flags, mode, meta_pgno)
 	if (ret != 0)
 		goto err;
 
+	if (dbp->blob_file_id != 0)
+		if ((ret = __blob_make_sub_dir(env, &dbp->blob_sub_dir,
+		    dbp->blob_file_id, dbp->blob_sdb_id)) != 0)
+			goto err;
+
 #ifdef HAVE_PARTITION
 	if (dbp->p_internal != NULL && (ret =
 	    __partition_open(dbp, ip, txn, fname, type, flags, mode, 1)) != 0)
@@ -432,8 +446,10 @@ err:	return (ret);
 
 /*
  * __db_chk_meta --
- *	Take a buffer containing a meta-data page and check it for a valid LSN,
- *	checksum (and verify the checksum if necessary) and possibly decrypt it.
+ *	Validate a buffer containing a possible meta-data page. It is
+ *      byte-swapped as necessary and checked for having a valid magic number.
+ *      If it does, then it can validate the LSN, checksum (if necessary),
+ *      and possibly decrypt it.
  *
  *	Return 0 on success, >0 (errno).
  *
@@ -447,44 +463,64 @@ __db_chk_meta(env, dbp, meta, flags)
 	u_int32_t flags;
 {
 	DB_LSN swap_lsn;
-	int is_hmac, ret, swapped;
-	u_int32_t magic, orig_chk;
+	int is_hmac, needs_swap, ret;
+	u_int32_t magic;
 	u_int8_t *chksum;
 
 	ret = 0;
-	swapped = 0;
+	needs_swap = 0;
 
+	/*
+	 * We can verify that this is some kind of db now, before any potential
+	 * decryption, because the first P_OVERHEAD() bytes of most pages are
+	 * cleartext. This gets called both before and after swapping, so we
+	 * need to check for byte swapping ourselves.
+	 */
+	magic = meta->magic;
+magic_retry:
+	switch (magic) {
+	case DB_BTREEMAGIC:
+	case DB_HASHMAGIC:
+	case DB_HEAPMAGIC:
+	case DB_QAMMAGIC:
+	case DB_RENAMEMAGIC:
+		break;
+	default:
+		if (needs_swap)
+			/* It's already been swapped, so it isn't a BDB file. */
+			return (EINVAL);
+		M_32_SWAP(magic);
+		needs_swap = 1;
+		goto magic_retry;
+	}
+
+	if (LOGGING_ON(env) && !LF_ISSET(DB_CHK_NOLSN)) {
+		swap_lsn = meta->lsn;
+		if (needs_swap) {
+			M_32_SWAP(swap_lsn.file);
+			M_32_SWAP(swap_lsn.offset);
+		}
+		if (!IS_REP_CLIENT(env) && !IS_NOT_LOGGED_LSN(swap_lsn) &&
+		    !IS_ZERO_LSN(swap_lsn) && (ret =
+		    __log_check_page_lsn(env, dbp, &swap_lsn)) != 0)
+			return (ret);
+	}
 	if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM)) {
 		if (dbp != NULL)
 			F_SET(dbp, DB_AM_CHKSUM);
-
-		is_hmac = meta->encrypt_alg == 0 ? 0 : 1;
-		chksum = ((BTMETA *)meta)->chksum;
-
-		/*
-		 * If we need to swap, the checksum function overwrites the
-		 * original checksum with 0, so we need to save a copy of the
-		 * original for swapping later.
-		 */
-		orig_chk = *(u_int32_t *)chksum;
-
 		/*
 		 * We cannot add this to __db_metaswap because that gets done
 		 * later after we've verified the checksum or decrypted.
 		 */
 		if (LF_ISSET(DB_CHK_META)) {
-			swapped = 0;
-chk_retry:		if ((ret =
+			is_hmac = meta->encrypt_alg != 0;
+			chksum = ((BTMETA *)meta)->chksum;
+			if (needs_swap && !is_hmac)
+				M_32_SWAP(*(u_int32_t *)chksum);
+			if ((ret =
 			    __db_check_chksum(env, NULL, env->crypto_handle,
-			    chksum, meta, DBMETASIZE, is_hmac)) != 0) {
-				if (is_hmac || swapped)
-					return (DB_CHKSUM_FAIL);
-
-				M_32_SWAP(orig_chk);
-				swapped = 1;
-				*(u_int32_t *)chksum = orig_chk;
-				goto chk_retry;
-			}
+			    chksum, meta, DBMETASIZE, is_hmac)) != 0)
+				return (DB_CHKSUM_FAIL);
 		}
 	} else if (dbp != NULL)
 		F_CLR(dbp, DB_AM_CHKSUM);
@@ -492,44 +528,8 @@ chk_retry:		if ((ret =
 #ifdef HAVE_CRYPTO
 	if (__crypto_decrypt_meta(env,
 	     dbp, (u_int8_t *)meta, LF_ISSET(DB_CHK_META)) != 0)
-	     	ret = DB_CHKSUM_FAIL;
-	else
+		ret = DB_CHKSUM_FAIL;
 #endif
-
-	/* Now that we're decrypted, we can check LSN. */
-	if (LOGGING_ON(env) && !LF_ISSET(DB_CHK_NOLSN)) {
-		/*
-		 * This gets called both before and after swapping, so we
-		 * need to check ourselves.  If we already swapped it above,
-		 * we'll know that here.
-		 */
-
-		swap_lsn = meta->lsn;
-		magic = meta->magic;
-lsn_retry:
-		if (swapped) {
-			M_32_SWAP(swap_lsn.file);
-			M_32_SWAP(swap_lsn.offset);
-			M_32_SWAP(magic);
-		}
-		switch (magic) {
-		case DB_BTREEMAGIC:
-		case DB_HASHMAGIC:
-		case DB_HEAPMAGIC:
-		case DB_QAMMAGIC:
-		case DB_RENAMEMAGIC:
-			break;
-		default:
-			if (swapped)
-				return (EINVAL);
-			swapped = 1;
-			goto lsn_retry;
-		}
-		if (!IS_REP_CLIENT(env) &&
-		    !IS_NOT_LOGGED_LSN(swap_lsn) && !IS_ZERO_LSN(swap_lsn))
-			/* Need to do check. */
-			ret = __log_check_page_lsn(env, dbp, &swap_lsn);
-	}
 	return (ret);
 }
 
@@ -598,7 +598,6 @@ swap_retry:
 	}
 
 	/*
-	 * We can only check the meta page if we are sure we have a meta page.
 	 * If it is random data, then this check can fail.  So only now can we
 	 * checksum and decrypt.  Don't distinguish between configuration and
 	 * checksum match errors here, because we haven't opened the database
@@ -606,9 +605,9 @@ swap_retry:
 	 * If DB_SKIP_CHK is set, it means the checksum was already checked
 	 * and the page was already decrypted.
 	 */
-	if (!LF_ISSET(DB_SKIP_CHK) && 
+	if (!LF_ISSET(DB_SKIP_CHK) &&
 	    (ret = __db_chk_meta(env, dbp, meta, flags)) != 0) {
-		if (ret == DB_CHKSUM_FAIL) 
+		if (ret == DB_CHKSUM_FAIL)
 			__db_errx(env, DB_STR_A("0640",
 			    "%s: metadata page checksum error", "%s"), name);
 		goto bad_format;
@@ -669,10 +668,9 @@ swap_retry:
 	}
 
 	if (FLD_ISSET(meta->metaflags,
-	    DBMETA_PART_RANGE | DBMETA_PART_CALLBACK))
-		if ((ret =
-		    __partition_init(dbp, meta->metaflags)) != 0)
-			return (ret);
+	    DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) &&
+	    (ret = __partition_init(dbp, meta->metaflags)) != 0)
+		return (ret);
 	return (0);
 
 bad_format:
diff --git a/src/db/db_overflow.c b/src/db/db_overflow.c
index d992ec0d..22f349ed 100644
--- a/src/db/db_overflow.c
+++ b/src/db/db_overflow.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -58,39 +58,26 @@
  */
 
 /*
- * __db_goff --
- *	Get an offpage item.
+ * __db_alloc_dbt
  *
- * PUBLIC: int __db_goff __P((DBC *,
- * PUBLIC:     DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *));
+ *	Allocate enough space in the dbt to hold the data. Also used by the
+ *	blob file API.
+ *
+ * PUBLIC: int __db_alloc_dbt __P((ENV *, DBT *, u_int32_t, u_int32_t *,
+ * PUBLIC:	u_int32_t *, void **, u_int32_t *));
  */
 int
-__db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
-	DBC *dbc;
+__db_alloc_dbt(env, dbt, tlen, nd, st, bpp, bpsz)
+	ENV *env;
 	DBT *dbt;
 	u_int32_t tlen;
-	db_pgno_t pgno;
+	u_int32_t *nd;
+	u_int32_t *st;
 	void **bpp;
 	u_int32_t *bpsz;
 {
-	DB *dbp;
-	DB_MPOOLFILE *mpf;
-	DB_TXN *txn;
-	DBC_INTERNAL *cp;
-	ENV *env;
-	PAGE *h;
-	DB_THREAD_INFO *ip;
-	db_indx_t bytes;
-	u_int32_t curoff, needed, start;
-	u_int8_t *p, *src;
 	int ret;
-
-	dbp = dbc->dbp;
-	cp = dbc->internal;
-	env = dbp->env;
-	ip = dbc->thread_info;
-	mpf = dbp->mpf;
-	txn = dbc->txn;
+	u_int32_t needed, start;
 
 	/*
 	 * Check if the buffer is big enough; if it is not and we are
@@ -110,6 +97,8 @@ __db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
 		start = 0;
 		needed = tlen;
 	}
+	*nd = needed;
+	*st = start;
 
 	/*
 	 * If the caller has not requested any data, return success. This
@@ -123,7 +112,7 @@ __db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
 	}
 
 	if (F_ISSET(dbt, DB_DBT_USERCOPY))
-		goto skip_alloc;
+		return (0);
 
 	/* Allocate any necessary memory. */
 	if (F_ISSET(dbt, DB_DBT_USERMEM)) {
@@ -152,7 +141,48 @@ __db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
 		return (DB_BUFFER_SMALL);
 	}
 
-skip_alloc:
+	return (0);
+}
+
+/*
+ * __db_goff --
+ *	Get an offpage item.
+ *
+ * PUBLIC: int __db_goff __P((DBC *,
+ * PUBLIC:     DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *));
+ */
+int
+__db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
+	DBC *dbc;
+	DBT *dbt;
+	u_int32_t tlen;
+	db_pgno_t pgno;
+	void **bpp;
+	u_int32_t *bpsz;
+{
+	DB *dbp;
+	DB_MPOOLFILE *mpf;
+	DB_TXN *txn;
+	DBC_INTERNAL *cp;
+	ENV *env;
+	PAGE *h;
+	DB_THREAD_INFO *ip;
+	db_indx_t bytes;
+	u_int32_t curoff, needed, start;
+	u_int8_t *p, *src;
+	int ret;
+
+	dbp = dbc->dbp;
+	cp = dbc->internal;
+	env = dbp->env;
+	ip = dbc->thread_info;
+	mpf = dbp->mpf;
+	txn = dbc->txn;
+
+	if (((ret = __db_alloc_dbt(
+	    env, dbt, tlen, &needed, &start, bpp, bpsz)) != 0) || needed == 0)
+		return (ret);
+
 	/* Set up a start page in the overflow chain if streaming. */
 	if (cp->stream_start_pgno != PGNO_INVALID &&
 	    pgno == cp->stream_start_pgno && start >= cp->stream_off &&
@@ -485,28 +515,33 @@ __db_doff(dbc, pgno)
 
 /*
  * __db_moff --
- *	Match on overflow pages.
+ *	Match on overflow pages from a specific offset.
  *
- * Given a starting page number and a key, return <0, 0, >0 to indicate if the
- * key on the page is less than, equal to or greater than the key specified.
- * We optimize this by doing chunk at a time comparison unless the user has
- * specified a comparison function.  In this case, we need to materialize
- * the entire object and call their comparison routine.
+ * Given a starting page number and a key, store <0, 0, >0 in 'cmpp' to indicate
+ * if the key on the page is less than, equal to or greater than the key
+ * specified. We optimize this by doing a chunk at a time comparison unless the
+ * user has specified a comparison function. In this case, we need to
+ * materialize the entire object and call their comparison routine.
+ *
+ * We start the comparison at an offset and update the offset with the
+ * longest matching count after the comparison.
  *
  * __db_moff and __db_coff are generic functions useful in searching and
  * ordering off page items. __db_moff matches an overflow DBT with an offpage
  * item. __db_coff compares two offpage items for lexicographic sort order.
  *
  * PUBLIC: int __db_moff __P((DBC *, const DBT *, db_pgno_t, u_int32_t,
- * PUBLIC:     int (*)(DB *, const DBT *, const DBT *), int *));
+ * PUBLIC:     int (*)(DB *, const DBT *, const DBT *, size_t *),
+ * PUBLIC:     int *, size_t *));
  */
 int
-__db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp)
+__db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp, locp)
 	DBC *dbc;
 	const DBT *dbt;
 	db_pgno_t pgno;
 	u_int32_t tlen;
-	int (*cmpfunc) __P((DB *, const DBT *, const DBT *)), *cmpp;
+	int (*cmpfunc) __P((DB *, const DBT *, const DBT *, size_t *)), *cmpp;
+	size_t *locp;
 {
 	DB *dbp;
 	DBT local_dbt;
@@ -517,6 +552,7 @@ __db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp)
 	u_int32_t bufsize, cmp_bytes, key_left;
 	u_int8_t *p1, *p2;
 	int ret;
+	size_t pos, start;
 
 	dbp = dbc->dbp;
 	ip = dbc->thread_info;
@@ -535,39 +571,76 @@ __db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp)
 		    &local_dbt, tlen, pgno, &buf, &bufsize)) != 0)
 			return (ret);
 		/* Pass the key as the first argument */
-		*cmpp = cmpfunc(dbp, dbt, &local_dbt);
+		*cmpp = cmpfunc(dbp, dbt, &local_dbt, NULL);
 		__os_free(dbp->env, buf);
 		return (0);
 	}
 
+	/*
+	 * We start the comparison from the location of 'locp' and store the
+	 * last matching location into 'locp'.
+	 */
+	start = (locp == NULL ? 0 : *locp);
+	pos = 0;
+
+	/* Subtract prefix length from lengths. */
+	tlen -= (u_int32_t)start;
+	key_left = dbt->size - (u_int32_t)start;
+	p1 = (u_int8_t *)dbt->data + start;
+
 	/* While there are both keys to compare. */
-	for (*cmpp = 0, p1 = dbt->data,
-	    key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) {
+	for (*cmpp = 0; key_left > 0 &&
+	    tlen > 0 && pgno != PGNO_INVALID;) {
 		if ((ret =
 		    __memp_fget(mpf, &pgno, ip, dbc->txn, 0, &pagep)) != 0)
 			return (ret);
 
-		cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left;
-		tlen -= cmp_bytes;
-		key_left -= cmp_bytes;
-		for (p2 = (u_int8_t *)pagep + P_OVERHEAD(dbp);
-		    cmp_bytes-- > 0; ++p1, ++p2)
-			if (*p1 != *p2) {
-				*cmpp = (long)*p1 - (long)*p2;
-				break;
+		/*
+		 * Figure out where to start comparison, and how many
+		 * bytes to compare.
+		 */
+		if (pos >= start) {
+			p2 = (u_int8_t *)pagep + P_OVERHEAD(dbp);
+			cmp_bytes = OV_LEN(pagep);
+		} else if (pos + OV_LEN(pagep) > start) {
+			p2 = (u_int8_t *)pagep +
+			    P_OVERHEAD(dbp) + (start - pos);
+			cmp_bytes = OV_LEN(pagep) - (u_int32_t)(start - pos);
+		} else {
+			p2 = NULL;
+			cmp_bytes = 0;
+		}
+
+		pos += OV_LEN(pagep);
+
+		if (cmp_bytes != 0) {
+			if (cmp_bytes > key_left)
+				cmp_bytes = key_left;
+			tlen -= cmp_bytes;
+			key_left -= cmp_bytes;
+			for (;cmp_bytes-- > 0; ++p1, ++p2) {
+				if (*p1 != *p2) {
+					*cmpp = (long)*p1 - (long)*p2;
+					break;
+				}
+				if (locp != NULL)
+					++(*locp);
 			}
+
+		}
 		pgno = NEXT_PGNO(pagep);
 		if ((ret = __memp_fput(mpf, ip, pagep, dbp->priority)) != 0)
 			return (ret);
 		if (*cmpp != 0)
 			return (0);
 	}
-	if (key_left > 0)		/* DBT is longer than the page key. */
-		*cmpp = 1;
-	else if (tlen > 0)		/* DBT is shorter than the page key. */
-		*cmpp = -1;
-	else
-		*cmpp = 0;
+
+	if (*cmpp == 0) {
+		if (key_left > 0) /* DBT is longer than the page key. */
+			*cmpp = 1;
+		else if (tlen > 0) /* DBT is shorter than the page key. */
+			*cmpp = -1;
+	}
 
 	return (0);
 }
@@ -587,13 +660,13 @@ __db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp)
  * DBT type.
  *
  * PUBLIC: int __db_coff __P((DBC *, const DBT *, const DBT *,
- * PUBLIC:     int (*)(DB *, const DBT *, const DBT *), int *));
+ * PUBLIC:     int (*)(DB *, const DBT *, const DBT *, size_t *), int *));
  */
 int
 __db_coff(dbc, dbt, match, cmpfunc, cmpp)
 	DBC *dbc;
 	const DBT *dbt, *match;
-	int (*cmpfunc) __P((DB *, const DBT *, const DBT *)), *cmpp;
+	int (*cmpfunc) __P((DB *, const DBT *, const DBT *, size_t *)), *cmpp;
 {
 	DB *dbp;
 	DB_THREAD_INFO *ip;
@@ -643,7 +716,7 @@ __db_coff(dbc, dbt, match, cmpfunc, cmpp)
 		    match_pgno, &match_buf, &match_bufsz)) != 0)
 			goto err1;
 		/* The key needs to be the first argument for sort order */
-		*cmpp = cmpfunc(dbp, &local_key, &local_match);
+		*cmpp = cmpfunc(dbp, &local_key, &local_match, NULL);
 
 err1:		if (dbt_buf != NULL)
 			__os_free(dbp->env, dbt_buf);
@@ -657,6 +730,7 @@ err1:		if (dbt_buf != NULL)
 		if ((ret =
 		    __memp_fget(mpf, &dbt_pgno, ip, txn, 0, &dbt_pagep)) != 0)
 			return (ret);
+		DB_ASSERT(dbc->env, TYPE(dbt_pagep) == P_OVERFLOW);
 		if ((ret =
 		    __memp_fget(mpf, &match_pgno,
 			ip, txn, 0, &match_pagep)) != 0) {
@@ -664,6 +738,7 @@ err1:		if (dbt_buf != NULL)
 			    mpf, ip, dbt_pagep, DB_PRIORITY_UNCHANGED);
 			return (ret);
 		}
+		DB_ASSERT(dbc->env, TYPE(match_pagep) == P_OVERFLOW);
 		cmp_bytes = page_space < max_data ? page_space : max_data;
 		for (p1 = (u_int8_t *)dbt_pagep + P_OVERHEAD(dbp),
 		    p2 = (u_int8_t *)match_pagep + P_OVERHEAD(dbp);
diff --git a/src/db/db_ovfl_vrfy.c b/src/db/db_ovfl_vrfy.c
index fa630f7b..55eb2b70 100644
--- a/src/db/db_ovfl_vrfy.c
+++ b/src/db/db_ovfl_vrfy.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
diff --git a/src/db/db_pr.c b/src/db/db_pr.c
index d95440f9..4933498e 100644
--- a/src/db/db_pr.c
+++ b/src/db/db_pr.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -11,6 +11,7 @@
 #include "db_int.h"
 #include "dbinc/db_page.h"
 #include "dbinc/btree.h"
+#include "dbinc/fop.h"
 #include "dbinc/hash.h"
 #include "dbinc/heap.h"
 #include "dbinc/mp.h"
@@ -25,6 +26,11 @@ static int	 __db_hmeta __P((ENV *, DB *, HMETA *, u_int32_t));
 static void	 __db_meta __P((ENV *, DB *, DBMETA *, FN const *, u_int32_t));
 static void	 __db_proff __P((ENV *, DB_MSGBUF *, void *));
 static int	 __db_qmeta __P((ENV *, DB *, QMETA *, u_int32_t));
+static int	 __db_prblob __P((DBC *, DBT *, DBT *, int, const char *,
+    void *, int (*callback) __P((void *, const void *)), int, int));
+static int	 __db_prblob_id __P((DB *, db_seq_t,
+		    off_t, DBT *, int, const char *, void *,
+		    int (*callback) __P((void *, const void *))));
 #ifdef HAVE_STATISTICS
 static void	 __db_prdb __P((DB *, u_int32_t));
 static int	 __db_prtree __P((DB *, DB_TXN *,
@@ -515,6 +521,11 @@ __db_bmeta(env, dbp, h, flags)
 		__db_msg(env, "\tre_len: %#lx re_pad: %#lx",
 		    (u_long)h->re_len, (u_long)h->re_pad);
 	__db_msg(env, "\troot: %lu", (u_long)h->root);
+	__db_msg(env, "\tblob_threshold: %lu", (u_long)h->blob_threshold);
+	__db_msg(env, "\tblob_file_lo: %lu", (u_long)h->blob_file_lo);
+	__db_msg(env, "\tblob_file_hi: %lu", (u_long)h->blob_file_hi);
+	__db_msg(env, "\tblob_sdb_lo: %lu", (u_long)h->blob_sdb_lo);
+	__db_msg(env, "\tblob_sdb_hi: %lu", (u_long)h->blob_sdb_hi);
 
 	return (0);
 }
@@ -549,6 +560,11 @@ __db_hmeta(env, dbp, h, flags)
 	__db_msg(env, "\tffactor: %lu", (u_long)h->ffactor);
 	__db_msg(env, "\tnelem: %lu", (u_long)h->nelem);
 	__db_msg(env, "\th_charkey: %#lx", (u_long)h->h_charkey);
+	__db_msg(env, "\tblob_threshold: %lu", (u_long)h->blob_threshold);
+	__db_msg(env, "\tblob_file_lo: %lu", (u_long)h->blob_file_lo);
+	__db_msg(env, "\tblob_file_hi: %lu", (u_long)h->blob_file_hi);
+	__db_msg(env, "\tblob_sdb_lo: %lu", (u_long)h->blob_sdb_lo);
+	__db_msg(env, "\tblob_sdb_hi: %lu", (u_long)h->blob_sdb_hi);
 	__db_msgadd(env, &mb, "\tspare points:\n\t");
 	for (i = 0; i < NCACHED; i++) {
 		__db_msgadd(env, &mb, "%lu (%lu) ", (u_long)h->spares[i],
@@ -604,6 +620,9 @@ __db_heapmeta(env, dbp, h, flags)
 	__db_msg(env, "\tnregions: %lu", (u_long)h->nregions);
 	__db_msg(env, "\tgbytes: %lu", (u_long)h->gbytes);
 	__db_msg(env, "\tbytes: %lu", (u_long)h->bytes);
+	__db_msg(env, "\tblob_threshold: %lu", (u_long)h->blob_threshold);
+	__db_msg(env, "\tblob_file_lo: %lu", (u_long)h->blob_file_lo);
+	__db_msg(env, "\tblob_file_hi: %lu", (u_long)h->blob_file_hi);
 
 	return (0);
 }
@@ -682,14 +701,19 @@ __db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags)
 {
 	BINTERNAL *bi;
 	BKEYDATA *bk;
+	BBLOB bl;
 	HOFFPAGE a_hkd;
+	HBLOB hblob;
 	QAMDATA *qp, *qep;
 	RINTERNAL *ri;
 	HEAPHDR *hh;
 	HEAPSPLITHDR *hs;
+	HEAPBLOBHDR bhdr;
 	db_indx_t dlen, len, i, *inp, max;
 	db_pgno_t pgno;
 	db_recno_t recno;
+	off_t blob_size;
+	db_seq_t blob_id;
 	u_int32_t qlen;
 	u_int8_t *ep, *hk, *p;
 	int deleted, ret;
@@ -899,6 +923,23 @@ __db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags)
 				    (u_long)a_hkd.tlen, (u_long)a_hkd.pgno);
 				DB_MSGBUF_FLUSH(env, mbp);
 				break;
+			case H_BLOB:
+				memcpy(&hblob, hk, HBLOB_SIZE);
+				blob_id = (db_seq_t)hblob.id;
+				__db_msgadd(env, mbp, "blob: id: %llu ",
+				    (long long)blob_id);
+				GET_BLOB_SIZE(env, hblob, blob_size, ret);
+				if (ret != 0)
+					__db_msgadd(env, mbp,
+					    "blob: blob_size overflow. ");
+				__db_msgadd(env, mbp, "blob: size: %llu",
+				    (long long)blob_size);
+				/*
+				 * No point printing the blob file, it is
+				 * likely not readable by humans.
+				 */
+				DB_MSGBUF_FLUSH(env, mbp);
+				break;
 			default:
 				DB_MSGBUF_FLUSH(env, mbp);
 				__db_msg(env, "ILLEGAL HASH PAGE TYPE: %lu",
@@ -925,6 +966,7 @@ __db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags)
 				__db_proff(env, mbp, bi->data);
 				break;
 			default:
+				/* B_BLOB does not appear on internal pages. */
 				DB_MSGBUF_FLUSH(env, mbp);
 				__db_msg(env, "ILLEGAL BINTERNAL TYPE: %lu",
 				    (u_long)B_TYPE(bi->type));
@@ -950,6 +992,19 @@ __db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags)
 			case B_OVERFLOW:
 				__db_proff(env, mbp, bk);
 				break;
+			case B_BLOB:
+				memcpy(&bl, bk, BBLOB_SIZE);
+				blob_id = (db_seq_t)bl.id;
+				__db_msgadd(env, mbp, "blob: id: %llu ",
+				    (long long)blob_id);
+				GET_BLOB_SIZE(env, bl, blob_size, ret);
+				if (ret != 0)
+					__db_msgadd(env, mbp,
+					    "blob: blob_size overflow. ");
+				__db_msgadd(env, mbp, "blob: size: %llu",
+				    (long long)blob_size);
+				DB_MSGBUF_FLUSH(env, mbp);
+				break;
 			default:
 				DB_MSGBUF_FLUSH(env, mbp);
 				__db_msg(env,
@@ -961,9 +1016,27 @@ __db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags)
 			break;
 		case P_HEAP:
 			hh = sp;
-			if (!F_ISSET(hh,HEAP_RECSPLIT))
+			if (!F_ISSET(hh,HEAP_RECSPLIT) &&
+			    !F_ISSET(hh, HEAP_RECBLOB))
 				hdata = (u_int8_t *)hh + sizeof(HEAPHDR);
-			else {
+			else if (F_ISSET(hh, HEAP_RECBLOB)) {
+				memcpy(&bhdr, hh, HEAPBLOBREC_SIZE);
+				blob_id = (db_seq_t)bhdr.id;
+				__db_msgadd(env, mbp, "blob: id: %llu ",
+				    (long long)blob_id);
+				GET_BLOB_SIZE(env, bhdr, blob_size, ret);
+				if (ret != 0)
+					__db_msgadd(env, mbp,
+					    "blob: blob_size overflow. ");
+				__db_msgadd(env, mbp, "blob: size: %llu",
+				    (long long)blob_size);
+				/*
+				 * No point printing the blob file, it is
+				 * likely not readable by humans.
+				 */
+				DB_MSGBUF_FLUSH(env, mbp);
+				break;
+			} else {
 				hs = sp;
 				__db_msgadd(env, mbp,
 				     "split: 0x%02x tsize: %lu next: %lu.%lu ",
@@ -1276,10 +1349,16 @@ __db_dump(dbp, subname, callback, handle, pflag, keyflag)
 	ENV *env;
 	db_recno_t recno;
 	int is_recno, is_heap, ret, t_ret;
+	u_int32_t blob_threshold;
 	void *pointer;
 
 	env = dbp->env;
 	is_heap = 0;
+	memset(&dataret, 0, sizeof(DBT));
+	memset(&keyret, 0, sizeof(DBT));
+
+	if ((ret = __db_get_blob_threshold(dbp, &blob_threshold)) != 0)
+		return (ret);
 
 	if ((ret = __db_prheader(
 	    dbp, subname, pflag, keyflag, handle, callback, NULL, 0)) != 0)
@@ -1317,8 +1396,8 @@ retry: while ((ret =
 	    !is_heap ? DB_NEXT | DB_MULTIPLE_KEY : DB_NEXT )) == 0) {
 		if (is_heap) {
 			/* Never dump keys for HEAP */
-			if ((ret = __db_prdbt(
-			    &data, pflag, " ", handle, callback, 0, 0)) != 0)
+			if ((ret = __db_prdbt(&data,
+			    pflag, " ", handle, callback, 0, 0, 0)) != 0)
 				goto err;
 			continue;
 		}
@@ -1337,17 +1416,24 @@ retry: while ((ret =
 
 			if ((keyflag &&
 			    (ret = __db_prdbt(&keyret, pflag, " ",
-			    handle, callback, is_recno, 0)) != 0) ||
+			    handle, callback, is_recno, 0, 0)) != 0) ||
 			    (ret = __db_prdbt(&dataret, pflag, " ",
-			    handle, callback, 0, 0)) != 0)
+			    handle, callback, 0, 0, 0)) != 0)
 					goto err;
 		}
 	}
 	if (ret == DB_BUFFER_SMALL) {
-		data.size = (u_int32_t)DB_ALIGN(data.size, 1024);
-		if ((ret = __os_realloc(env, data.size, &data.data)) != 0)
-			goto err;
-		data.ulen = data.size;
+		if (blob_threshold != 0 && data.size >= blob_threshold) {
+			if ((ret = __db_prblob(dbcp, &key, &data, pflag,
+			    " ", handle, callback, is_heap, keyflag)) != 0)
+				goto err;
+		} else {
+			data.size = (u_int32_t)DB_ALIGN(data.size, 1024);
+			if ((ret = __os_realloc(
+			    env, data.size, &data.data)) != 0)
+				goto err;
+			data.ulen = data.size;
+		}
 		goto retry;
 	}
 	if (ret == DB_NOTFOUND)
@@ -1365,14 +1451,153 @@ err:	if ((t_ret = __dbc_close(dbcp)) != 0 && ret == 0)
 }
 
 /*
+ * __db_prblob
+ *	Print a blob file.
+ */
+static int
+__db_prblob(dbc, key, data, checkprint,
+    prefix, handle, callback, is_heap, keyflag)
+	DBC *dbc;
+	DBT *key;
+	DBT *data;
+	int checkprint;
+	const char *prefix;
+	void *handle;
+	int (*callback) __P((void *, const void *));
+	int is_heap;
+	int keyflag;
+{
+	DBC *local;
+	DBT partial;
+	int ret, t_ret;
+	off_t blob_size;
+	db_seq_t blob_id;
+
+	local = NULL;
+	memset(&partial, 0, sizeof(DBT));
+	partial.flags = DB_DBT_PARTIAL;
+
+	if ((ret = __dbc_idup(dbc, &local, DB_POSITION)) != 0)
+		goto err;
+
+	/* Move the cursor to the blob. */
+	if ((ret = __dbc_get(local, key, &partial, DB_NEXT)) != 0)
+		return (ret);
+
+	if ((ret = __dbc_get_blob_id(local, &blob_id)) != 0) {
+		/*
+		 * It is possible this is not a blob.  Non-blob items that are
+		 * larger than the blob threshold can exist if the item was
+		 * smaller than the threshold when created, then later updated
+		 * to larger than the threshold value.
+		 */
+		if (ret == EINVAL) {
+			ret = 0;
+			data->size = (u_int32_t)DB_ALIGN(data->size, 1024);
+			if ((ret = __os_realloc(
+			    dbc->env, data->size, &data->data)) != 0)
+				goto err;
+			data->ulen = data->size;
+		}
+		goto err;
+	}
+
+	if (data->ulen < MEGABYTE) {
+		if ((data->data = realloc(
+		    data->data, data->ulen = MEGABYTE)) == NULL) {
+			ret = ENOMEM;
+			goto err;
+		}
+	}
+
+	if ((ret = __dbc_get_blob_size(local, &blob_size)) != 0)
+		goto err;
+
+	if (keyflag && !is_heap && (ret = __db_prdbt(
+	    key, checkprint, " ", handle, callback, 0, 0, 0)) != 0)
+		goto err;
+
+	if ((ret = __db_prblob_id(local->dbp, blob_id, blob_size,
+	    data, checkprint, prefix, handle, callback)) != 0)
+		goto err;
+
+	/* Move the cursor. */
+	ret = __dbc_get(dbc, key, &partial, DB_NEXT);
+
+err:	if (local != NULL) {
+		if ((t_ret = __dbc_close(local)) != 0 && ret == 0)
+			ret = t_ret;
+	}
+
+	return (ret);
+}
+
+/*
+ * __db_prblob_id --
+ *	Print a blob file identified by the given id.
+ */
+static int
+__db_prblob_id(dbp, blob_id,
+    blob_size, data, checkprint, prefix, handle, callback)
+	DB *dbp;
+	db_seq_t blob_id;
+	off_t blob_size;
+	DBT *data;
+	int checkprint;
+	const char *prefix;
+	void *handle;
+	int (*callback) __P((void *, const void *));
+{
+	DB_FH *fhp;
+	const char *pre;
+	int ret, skip_newline, t_ret;
+	off_t left, offset;
+
+	fhp = NULL;
+	offset = 0;
+
+	if ((ret = __blob_file_open(
+	    dbp, &fhp, blob_id, DB_FOP_READONLY, 1)) != 0)
+		goto err;
+
+	left = blob_size;
+	while (left > 0) {
+		if ((ret = __blob_file_read(
+		    dbp->env, fhp, data, offset, data->ulen)) != 0)
+			goto err;
+		if (offset == 0)
+			pre = prefix;
+		else
+			pre = NULL;
+		skip_newline = data->size < left ? 1 : 0;
+		if ((ret = __db_prdbt(data, checkprint, pre,
+		    handle, callback, 0, 0, skip_newline)) != 0)
+			goto err;
+		if (data->size > left)
+			left = 0;
+		else
+			left = left - data->size;
+		offset = offset + data->size;
+	}
+
+err:	if (fhp != NULL) {
+		if ((t_ret = __os_closehandle(dbp->env, fhp)) != 0 && ret == 0)
+			ret = t_ret;
+	}
+
+	return (ret);
+}
+
+/*
  * __db_prdbt --
  *	Print out a DBT data element.
  *
  * PUBLIC: int __db_prdbt __P((DBT *, int, const char *, void *,
- * PUBLIC:     int (*)(void *, const void *), int, int));
+ * PUBLIC:     int (*)(void *, const void *), int, int, int));
  */
 int
-__db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, is_heap)
+__db_prdbt(dbtp, checkprint,
+    prefix, handle, callback, is_recno, is_heap, no_newline)
 	DBT *dbtp;
 	int checkprint;
 	const char *prefix;
@@ -1380,16 +1605,17 @@ __db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, is_heap)
 	int (*callback) __P((void *, const void *));
 	int is_recno;
 	int is_heap;
+	int no_newline;
 {
-	static const u_char hex[] = "0123456789abcdef";
 	db_recno_t recno;
 	DB_HEAP_RID rid;
-	size_t len;
+	size_t count, len;
 	int ret;
+	u_int8_t *p;
 #define	DBTBUFLEN	100
-	u_int8_t *p, *hp;
-	char buf[DBTBUFLEN], hbuf[DBTBUFLEN];
+	char buf[DBTBUFLEN], hexbuf[2 * DBTBUFLEN + 1];
 
+	ret = 0;
 	/*
 	 * !!!
 	 * This routine is the routine that dumps out items in the format
@@ -1409,13 +1635,8 @@ __db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, is_heap)
 
 		/* If we're printing data as hex, print keys as hex too. */
 		if (!checkprint) {
-			for (len = strlen(buf), p = (u_int8_t *)buf,
-			    hp = (u_int8_t *)hbuf; len-- > 0; ++p) {
-				*hp++ = hex[(u_int8_t)(*p & 0xf0) >> 4];
-				*hp++ = hex[*p & 0x0f];
-			}
-			*hp = '\0';
-			ret = callback(handle, hbuf);
+			(void)__db_tohex(buf, strlen(buf), hexbuf);
+			ret = callback(handle, hexbuf);
 		} else
 			ret = callback(handle, buf);
 
@@ -1433,44 +1654,46 @@ __db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, is_heap)
 
 		/* If we're printing data as hex, print keys as hex too. */
 		if (!checkprint) {
-			for (len = strlen(buf), p = (u_int8_t *)buf,
-			    hp = (u_int8_t *)hbuf; len-- > 0; ++p) {
-				*hp++ = hex[(u_int8_t)(*p & 0xf0) >> 4];
-				*hp++ = hex[*p & 0x0f];
-			}
-			*hp = '\0';
-			ret = callback(handle, hbuf);
+			(void)__db_tohex(buf, strlen(buf), hexbuf);
+			ret = callback(handle, hexbuf);
 		} else
 			ret = callback(handle, buf);
 
 		if (ret != 0)
 			return (ret);
 	} else if (checkprint) {
+		/*
+		 * Prepare buf for the 'isprint()' case: printable single char
+		 * strings; prepare hexbuf for the other case '\<2 hex digits>'.
+		 */
+		buf[1] = '\0';
+		hexbuf[0] = '\\';
 		for (len = dbtp->size, p = dbtp->data; len--; ++p)
 			if (isprint((int)*p)) {
 				if (*p == '\\' &&
 				    (ret = callback(handle, "\\")) != 0)
 					return (ret);
-				snprintf(buf, DBTBUFLEN, "%c", *p);
+				buf[0] = (char)*p;
 				if ((ret = callback(handle, buf)) != 0)
 					return (ret);
 			} else {
-				snprintf(buf, DBTBUFLEN, "\\%c%c",
-				    hex[(u_int8_t)(*p & 0xf0) >> 4],
-				    hex[*p & 0x0f]);
-				if ((ret = callback(handle, buf)) != 0)
+				(void)__db_tohex(p, 1, hexbuf + 1);
+				if ((ret = callback(handle, hexbuf)) != 0)
 					return (ret);
 			}
 	} else
-		for (len = dbtp->size, p = dbtp->data; len--; ++p) {
-			snprintf(buf, DBTBUFLEN, "%c%c",
-			    hex[(u_int8_t)(*p & 0xf0) >> 4],
-			    hex[*p & 0x0f]);
-			if ((ret = callback(handle, buf)) != 0)
+		for (len = dbtp->size, p = dbtp->data, count = DBTBUFLEN;
+		     len > 0; len -= count, p += count) {
+			if (count > len)
+				count = len;
+			(void)__db_tohex(p, count, hexbuf);
+			if ((ret = callback(handle, hexbuf)) != 0)
 				return (ret);
 		}
-
-	return (callback(handle, "\n"));
+	if (no_newline == 0)
+		return (callback(handle, "\n"));
+	else
+		return (ret);
 }
 
 /*
@@ -1598,7 +1821,7 @@ __db_prheader(dbp, subname, pflag, keyflag, handle, callback, vdp, meta_pgno)
 			goto err;
 		DB_INIT_DBT(dbt, subname, strlen(subname));
 		if ((ret = __db_prdbt(&dbt, 1,
-		    NULL, handle, callback, 0, 0)) != 0)
+		    NULL, handle, callback, 0, 0, 0)) != 0)
 			goto err;
 	}
 	switch (dbtype) {
@@ -1868,7 +2091,7 @@ __db_prheader(dbp, subname, pflag, keyflag, handle, callback, vdp, meta_pgno)
 				goto err;
 			for (i = 0; i < tmp_u_int32 - 1; i++)
 			    if ((ret = __db_prdbt(&keys[i],
-				pflag, " ", handle, callback, 0, 0)) != 0)
+				pflag, " ", handle, callback, 0, 0, 0)) != 0)
 					goto err;
 		}
 	}
@@ -1954,3 +2177,33 @@ __db_dbtype_to_string(type)
 	}
 	return ("UNKNOWN TYPE");
 }
+
+/*
+ * __db_tohex --
+ *	Generate a hex string representation of a byte array.
+ *	The size of the destination must be at least 2*len + 1 bytes long,
+ *	to allow for the '\0' terminator, which is always added.
+ *
+ * PUBLIC: char *__db_tohex __P((const void *, size_t, char *));
+ */
+char *
+__db_tohex(source, len, dest)
+	const void *source;
+	size_t  len;
+	char *dest;
+{
+	static const char hex[] = "0123456789abcdef";
+	const u_int8_t *s;
+	char *d;
+
+	s = source;
+	d = dest;
+	while (len > 0) {
+	    *d++ = hex[(*s & 0xf0) >> 4];
+	    *d++ = hex[*s & 0x0f];
+	    s++;
+	    len--;
+	}
+	*d = '\0';
+	return ((char *)dest);
+}
diff --git a/src/db/db_rec.c b/src/db/db_rec.c
index 8ba1124e..98b29b22 100644
--- a/src/db/db_rec.c
+++ b/src/db/db_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -1194,8 +1194,9 @@ __db_pg_init_recover(env, dbtp, lsnp, op, info)
 	DB_LSN copy_lsn;
 	DB_MPOOLFILE *mpf;
 	PAGE *pagep;
-	int cmp_n, cmp_p, ret, type;
+	int cmp_n, cmp_p, ret, t_ret, type;
 
+	pagep = NULL;
 	ip = ((DB_TXNHEAD *)info)->thread_info;
 	REC_PRINT(__db_pg_init_print);
 	REC_INTRO(__db_pg_init_read, ip, 0);
@@ -1247,11 +1248,12 @@ __db_pg_init_recover(env, dbtp, lsnp, op, info)
 			memcpy((u_int8_t*)pagep + HOFFSET(pagep),
 			     argp->data.data, argp->data.size);
 	}
-	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
-		goto out;
 
 done:	*lsnp = argp->prev_lsn;
 out:
+	if (pagep != NULL && (t_ret =
+	     __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0 && ret == 0)
+	    	ret = t_ret;
 	REC_CLOSE;
 }
 
diff --git a/src/db/db_reclaim.c b/src/db/db_reclaim.c
index b902769a..abae33d9 100644
--- a/src/db/db_reclaim.c
+++ b/src/db/db_reclaim.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -181,6 +181,7 @@ __db_truncate_callback(dbc, p, cookie, putp)
 			switch (*H_PAIRDATA(dbp, p, indx)) {
 			case H_OFFDUP:
 				break;
+			case H_BLOB:
 			case H_OFFPAGE:
 			case H_KEYDATA:
 				++*countp;
diff --git a/src/db/db_remove.c b/src/db/db_remove.c
index 591a29b2..d6118fae 100644
--- a/src/db/db_remove.c
+++ b/src/db/db_remove.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -18,7 +18,7 @@
 #include "dbinc/txn.h"
 
 static int __db_dbtxn_remove __P((DB *,
-    DB_THREAD_INFO *, DB_TXN *, const char *, const char *));
+    DB_THREAD_INFO *, DB_TXN *, const char *, const char *, APPNAME));
 static int __db_subdb_remove __P((DB *,
     DB_THREAD_INFO *, DB_TXN *, const char *, const char *, u_int32_t));
 
@@ -264,7 +264,7 @@ __db_remove_int(dbp, ip, txn, name, subdb, flags)
 
 	/* Handle transactional file removes separately. */
 	if (IS_REAL_TXN(txn)) {
-		ret = __db_dbtxn_remove(dbp, ip, txn, name, subdb);
+		ret = __db_dbtxn_remove(dbp, ip, txn, name, subdb, DB_APP_DATA);
 		goto err;
 	}
 
@@ -293,6 +293,10 @@ __db_remove_int(dbp, ip, txn, name, subdb, flags)
 	    (ret = dbp->db_am_remove(dbp, ip, NULL, name, subdb, flags)) != 0)
 		goto err;
 
+	if (dbp->db_am_remove == NULL &&
+	    (ret = __blob_del_all(dbp, txn, 0)) != 0)
+		goto err;
+
 	ret = F_ISSET(dbp, DB_AM_INMEM) ?
 	    __db_inmem_remove(dbp, NULL, real_name) :
 	    __fop_remove(env,
@@ -407,6 +411,10 @@ __db_subdb_remove(dbp, ip, txn, name, subdb, flags)
 	    txn, name, subdb, DB_UNKNOWN, DB_WRITEOPEN, 0, PGNO_BASE_MD)) != 0)
 		goto err;
 
+	if (sdbp->blob_threshold != 0)
+		if ((ret = __blob_del_all(sdbp, txn, 0)) != 0)
+			goto err;
+
 	DB_TEST_RECOVERY(sdbp, DB_TEST_PREDESTROY, ret, name);
 
 	/* Have the handle locked so we will not lock pages. */
@@ -460,18 +468,21 @@ err:
 }
 
 static int
-__db_dbtxn_remove(dbp, ip, txn, name, subdb)
+__db_dbtxn_remove(dbp, ip, txn, name, subdb, appname)
 	DB *dbp;
 	DB_THREAD_INFO *ip;
 	DB_TXN *txn;
 	const char *name, *subdb;
+	APPNAME appname;
 {
 	ENV *env;
 	int ret;
 	char *tmpname;
+	u_int32_t flags;
 
 	env = dbp->env;
 	tmpname = NULL;
+	flags = DB_NOSYNC;
 
 	/*
 	 * This is a transactional remove, so we have to keep the name
@@ -488,7 +499,12 @@ __db_dbtxn_remove(dbp, ip, txn, name, subdb)
 	DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, name);
 
 	if ((ret = __db_rename_int(dbp,
-	    txn->thread_info, txn, name, subdb, tmpname, DB_NOSYNC)) != 0)
+	    txn->thread_info, txn, name, subdb, tmpname, flags)) != 0)
+		goto err;
+
+	/* Delete all blob files, if this database supports blobs. */
+	if (appname != DB_APP_BLOB && (dbp->blob_file_id != 0 ||
+	    dbp->blob_sdb_id != 0) && (ret = __blob_del_all(dbp, txn, 0)) != 0)
 		goto err;
 
 	/*
@@ -501,7 +517,7 @@ __db_dbtxn_remove(dbp, ip, txn, name, subdb)
 	ret = F_ISSET(dbp, DB_AM_INMEM) ?
 	     __db_inmem_remove(dbp, txn, tmpname) :
 	    __fop_remove(env,
-	    txn, dbp->fileid, tmpname, &dbp->dirname, DB_APP_DATA,
+	    txn, dbp->fileid, tmpname, &dbp->dirname, appname,
 	    F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0);
 
 	DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, name);
diff --git a/src/db/db_rename.c b/src/db/db_rename.c
index 2812b948..5b2bed42 100644
--- a/src/db/db_rename.c
+++ b/src/db/db_rename.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -285,10 +285,11 @@ __db_rename_int(dbp, ip, txn, name, subdb, newname, flags)
 	 * taken care of in the fop layer.
 	 */
 	if (IS_REAL_TXN(txn)) {
-		if ((ret = __fop_dummy(dbp, txn, old, newname)) != 0)
+		if ((ret =
+		    __fop_dummy(dbp, txn, old, newname, DB_APP_DATA)) != 0)
 			goto err;
 	} else {
-		if ((ret = __fop_dbrename(dbp, old, newname)) != 0)
+		if ((ret = __fop_dbrename(dbp, old, newname, DB_APP_DATA)) != 0)
 			goto err;
 	}
 
diff --git a/src/db/db_ret.c b/src/db/db_ret.c
index 709605f6..ddd0ef51 100644
--- a/src/db/db_ret.c
+++ b/src/db/db_ret.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -29,18 +29,27 @@ __db_ret(dbc, h, indx, dbt, memp, memsize)
 	void **memp;
 	u_int32_t *memsize;
 {
+	BBLOB bl;
 	BKEYDATA *bk;
 	BOVERFLOW *bo;
 	DB *dbp;
+	ENV *env;
+	HBLOB hblob;
+	HEAPBLOBHDR bhdr;
 	HEAPHDR *hdr;
+	db_seq_t blob_id;
+	int ret;
 	HOFFPAGE ho;
+	off_t blob_size;
 	u_int32_t len;
 	u_int8_t *hk;
 	void *data;
 
 	if (F_ISSET(dbt, DB_DBT_READONLY))
 		return (0);
+	ret = 0;
 	dbp = dbc->dbp;
+	env = dbp->env;
 
 	switch (TYPE(h)) {
 	case P_HASH_UNSORTED:
@@ -50,6 +59,20 @@ __db_ret(dbc, h, indx, dbt, memp, memsize)
 			memcpy(&ho, hk, sizeof(HOFFPAGE));
 			return (__db_goff(dbc, dbt,
 			    ho.tlen, ho.pgno, memp, memsize));
+		} else if (HPAGE_PTYPE(hk) == H_BLOB) {
+			/* Get the record instead of the blob item. */
+			if (F_ISSET(dbt, DB_DBT_BLOB_REC)) {
+				data = P_ENTRY(dbp, h, indx);
+				len = HBLOB_SIZE;
+				break;
+			}
+			memcpy(&hblob, hk, HBLOB_SIZE);
+			blob_id = (db_seq_t)hblob.id;
+			GET_BLOB_SIZE(env, hblob, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			return (__blob_get(
+			    dbc, dbt, blob_id, blob_size, memp, memsize));
 		}
 		len = LEN_HKEYDATA(dbp, h, dbp->pgsize, indx);
 		data = HKEYDATA_DATA(hk);
@@ -58,6 +81,21 @@ __db_ret(dbc, h, indx, dbt, memp, memsize)
 		hdr = (HEAPHDR *)P_ENTRY(dbp, h, indx);
 		if (F_ISSET(hdr,(HEAP_RECSPLIT | HEAP_RECFIRST)))
 			return (__heapc_gsplit(dbc, dbt, memp, memsize));
+		else if (F_ISSET(hdr, HEAP_RECBLOB)) {
+			/* Get the record instead of the blob item. */
+			if (F_ISSET(dbt, DB_DBT_BLOB_REC)) {
+				data = P_ENTRY(dbp, h, indx);
+				len = HEAPBLOBREC_SIZE;
+				break;
+			}
+			memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE);
+			blob_id = (db_seq_t)bhdr.id;
+			GET_BLOB_SIZE(env, bhdr, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			return (__blob_get(
+			    dbc, dbt, blob_id, blob_size, memp, memsize));
+		}
 		len = hdr->size;
 		data = (u_int8_t *)hdr + sizeof(HEAPHDR);
 		break;
@@ -69,6 +107,20 @@ __db_ret(dbc, h, indx, dbt, memp, memsize)
 			bo = (BOVERFLOW *)bk;
 			return (__db_goff(dbc, dbt,
 			    bo->tlen, bo->pgno, memp, memsize));
+		} else if (B_TYPE(bk->type) == B_BLOB) {
+			/* Get the record instead of the blob item. */
+			if (F_ISSET(dbt, DB_DBT_BLOB_REC)) {
+				data = P_ENTRY(dbp, h, indx);
+				len = BBLOB_SIZE;
+				break;
+			}
+			memcpy(&bl, bk, BBLOB_SIZE);
+			blob_id = (db_seq_t)bl.id;
+			GET_BLOB_SIZE(env, bl, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			return (__blob_get(
+			    dbc, dbt, blob_id, blob_size, memp, memsize));
 		}
 		len = bk->len;
 		data = bk->data;
@@ -167,3 +219,71 @@ __db_retcopy(env, dbt, data, len, memp, memsize)
 
 	return (ret);
 }
+
+/*
+ * __db_dbt_clone --
+ *	Clone a DBT from another DBT.
+ * The input dest DBT must be a zero initialized DBT that will be populated.
+ * The function does not allocate a dest DBT to allow for cloning into stack
+ * or locally allocated variables. It is the callers responsibility to free
+ * the memory allocated in dest->data.
+ *
+ * PUBLIC: int __db_dbt_clone __P((ENV *, DBT *, const DBT *));
+ */
+int
+__db_dbt_clone(env, dest, src)
+	ENV *env;
+	DBT *dest;
+	const DBT *src;
+{
+	u_int32_t err_flags;
+	int ret;
+
+	DB_ASSERT(env, dest->data == NULL);
+
+	ret = 0;
+
+	/* The function does not support the following DBT flags. */
+	err_flags = DB_DBT_MALLOC | DB_DBT_REALLOC |
+	    DB_DBT_MULTIPLE | DB_DBT_PARTIAL;
+	if (F_ISSET(src, err_flags)) {
+		__db_errx(env, DB_STR("0758",
+		    "Unsupported flags when cloning the DBT."));
+		return (EINVAL);
+	}
+
+	if ((ret = __os_malloc(env, src->size, &dest->data)) != 0)
+		return (ret);
+
+	memcpy(dest->data, src->data, src->size);
+	dest->ulen = src->size;
+	dest->size = src->size;
+	dest->flags = DB_DBT_USERMEM;
+
+	return (ret);
+}
+
+/*
+ * __db_dbt_clone_free --
+ *	Free a DBT cloned by __db_dbt_clone
+ *
+ * PUBLIC: int __db_dbt_clone_free __P((ENV *, DBT *));
+ */
+int
+__db_dbt_clone_free(env, dbt)
+	ENV *env;
+	DBT *dbt;
+{
+	/* Currently only DB_DBT_USERMEM is supported. */
+	if (dbt->flags != DB_DBT_USERMEM) {
+		__db_errx(env, DB_STR("0759",
+		    "Unsupported flags when freeing the cloned DBT."));
+		return (EINVAL);
+	}
+
+	if (dbt->data != NULL)
+		__os_free(env, dbt->data);
+	dbt->size = dbt->ulen = 0;
+
+	return (0);
+}
diff --git a/src/db/db_setid.c b/src/db/db_setid.c
index 697c3ff7..5c61a139 100644
--- a/src/db/db_setid.c
+++ b/src/db/db_setid.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/db/db_setlsn.c b/src/db/db_setlsn.c
index 1a3280ed..acee80f6 100644
--- a/src/db/db_setlsn.c
+++ b/src/db/db_setlsn.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/db/db_sort_multiple.c b/src/db/db_sort_multiple.c
index c5e2e941..7facb80e 100644
--- a/src/db/db_sort_multiple.c
+++ b/src/db/db_sort_multiple.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 
 #include "db_config.h"
@@ -34,7 +34,7 @@ __db_compare_both(db, akey, adata, bkey, bdata)
 
 	t = (BTREE *)db->bt_internal;
 
-	cmp = t->bt_compare(db, akey, bkey);
+	cmp = t->bt_compare(db, akey, bkey, NULL);
 	if (cmp != 0) return cmp;
 	if (!F_ISSET(db, DB_AM_DUPSORT))
 	    return (0);
@@ -44,9 +44,9 @@ __db_compare_both(db, akey, adata, bkey, bdata)
 
 #ifdef HAVE_COMPRESSION
 	if (DB_IS_COMPRESSED(db))
-		return t->compress_dup_compare(db, adata, bdata);
+		return t->compress_dup_compare(db, adata, bdata, NULL);
 #endif
-	return db->dup_compare(db, adata, bdata);
+	return db->dup_compare(db, adata, bdata, NULL);
 }
 
 #define	DB_SORT_SWAP(a, ad, b, bd)					\
diff --git a/src/db/db_stati.c b/src/db/db_stati.c
index 61744e81..b7367f37 100644
--- a/src/db/db_stati.c
+++ b/src/db/db_stati.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/db/db_truncate.c b/src/db/db_truncate.c
index 0eeb0c64..d57a23b2 100644
--- a/src/db/db_truncate.c
+++ b/src/db/db_truncate.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -191,6 +191,10 @@ __db_truncate(dbp, ip, txn, countp)
 	if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
 		ret = t_ret;
 
+	/* Delete all blob files. */
+	if (ret == 0)
+		ret = __blob_del_all(dbp, txn, 1);
+
 	DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, NULL);
 
 DB_TEST_RECOVERY_LABEL
diff --git a/src/db/db_upg.c b/src/db/db_upg.c
index de5d0dc7..7dcc3b1c 100644
--- a/src/db/db_upg.c
+++ b/src/db/db_upg.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -13,6 +13,7 @@
 #include "dbinc/db_swap.h"
 #include "dbinc/btree.h"
 #include "dbinc/hash.h"
+#include "dbinc/heap.h"
 #include "dbinc/qam.h"
 
 /*
@@ -98,6 +99,27 @@ static int (* const func_46_list[P_PAGETYPE_MAX])
 	NULL,			/* P_IHEAP */
 };
 
+static int (* const func_60_list[P_PAGETYPE_MAX])
+    __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = {
+	NULL,			/* P_INVALID */
+	NULL,			/* __P_DUPLICATE */
+	NULL,			/* P_HASH_UNSORTED */
+	NULL,			/* P_IBTREE */
+	NULL,			/* P_IRECNO */
+	__bam_60_lbtree,	/* P_LBTREE */
+	NULL,			/* P_LRECNO */
+	NULL,			/* P_OVERFLOW */
+	__ham_60_hashmeta,	/* P_HASHMETA */
+	__bam_60_btreemeta,	/* P_BTREEMETA */
+	NULL,			/* P_QAMMETA */
+	NULL,			/* P_QAMDATA */
+	NULL,			/* P_LDUP */
+	__ham_60_hash,		/* P_HASH */
+	__heap_60_heapmeta,	/* P_HEAPMETA */
+	__heap_60_heap,		/* P_HEAP */
+	NULL,			/* P_IHEAP */
+};
+
 static int __db_page_pass __P((DB *, char *, u_int32_t, int (* const [])
 	       (DB *, char *, u_int32_t, DB_FH *, PAGE *, int *), DB_FH *));
 static int __db_set_lastpgno __P((DB *, char *, DB_FH *));
@@ -181,6 +203,34 @@ __db_upgrade(dbp, fname, flags)
 				goto err;
 			/* FALLTHROUGH */
 		case 9:
+			/*
+			 * Various blob ids and size use two u_int32_t values
+			 * to represent 64 bit integers in early 6.0.  Change
+			 * those values to 64 bit integers.
+			 */
+			/*
+			 * Read the encrypt_alg and chksum fields from the
+			 * metadata page.
+			 */
+			meta = (DBMETA *)mbuf;
+			if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
+				F_SET(dbp, DB_AM_CHKSUM);
+			if (meta->encrypt_alg != 0) {
+				if (!CRYPTO_ON(dbp->env)) {
+					__db_errx(env, DB_STR("0777",
+"Attempt to upgrade an encrypted database without providing a password."));
+					ret = EINVAL;
+					goto err;
+				}
+				F_SET(dbp, DB_AM_ENCRYPT);
+			}
+			memcpy(&dbp->pgsize,
+			    &meta->pagesize, sizeof(u_int32_t));
+			if ((ret = __db_page_pass(dbp,
+			    real_name, flags, func_60_list, fhp)) != 0)
+				goto err;
+			/* FALLTHROUGH */
+		case 10:
 			break;
 		default:
 			__db_errx(env, DB_STR_A("0666",
@@ -307,6 +357,34 @@ __db_upgrade(dbp, fname, flags)
 
 			/* FALLTHROUGH */
 		case 9:
+			/*
+			 * Various blob ids and size use two u_int32_t values
+			 * to represent 64 bit integers in early 6.0.  Change
+			 * those values to 64 bit integers.
+			 */
+			meta = (DBMETA*)mbuf;
+			memcpy(&dbp->pgsize,
+			    &meta->pagesize, sizeof(u_int32_t));
+			/*
+			 * Read the encrypt_alg and chksum fields from the
+			 * metadata page.
+			 */
+			if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
+				F_SET(dbp, DB_AM_CHKSUM);
+			if (meta->encrypt_alg != 0) {
+				if (!CRYPTO_ON(dbp->env)) {
+					__db_errx(env, DB_STR("0778",
+"Attempt to upgrade an encrypted database without providing a password."));
+					ret = EINVAL;
+					goto err;
+				}
+				F_SET(dbp, DB_AM_ENCRYPT);
+			}
+			if ((ret = __db_page_pass(dbp,
+			    real_name, flags, func_60_list, fhp)) != 0)
+				goto err;
+			/* FALLTHROUGH */
+		case 10:
 			break;
 		default:
 			__db_errx(env, DB_STR_A("0668",
@@ -317,9 +395,45 @@ __db_upgrade(dbp, fname, flags)
 		}
 		break;
 	case DB_HEAPMAGIC:
-		/*
-		 * There's no upgrade needed for Heap yet.
-		 */
+		switch (((DBMETA *)mbuf)->version) {
+		case 1:
+			/*
+			 * Various blob ids and size use two u_int32_t values
+			 * to represent 64 bit integers in early 6.0.  Change
+			 * those values to 64 bit integers.
+			 */
+			meta = (DBMETA*)mbuf;
+			memcpy(&dbp->pgsize,
+			    &meta->pagesize, sizeof(u_int32_t));
+			/*
+			 * Read the encrypt_alg and chksum fields from the
+			 * metadata page.
+			 */
+			if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
+				F_SET(dbp, DB_AM_CHKSUM);
+			if (meta->encrypt_alg != 0) {
+				if (!CRYPTO_ON(dbp->env)) {
+					__db_errx(env, DB_STR("0779",
+"Attempt to upgrade an encrypted database without providing a password."));
+					ret = EINVAL;
+					goto err;
+				}
+				F_SET(dbp, DB_AM_ENCRYPT);
+			}
+			if ((ret = __db_page_pass(dbp,
+			    real_name, flags, func_60_list, fhp)) != 0)
+				goto err;
+			/* FALLTHROUGH */
+		case 2:
+			break;
+		default:
+			__db_errx(env, DB_STR_A("0776",
+			    "%s: unsupported heap version: %lu",
+			    "%s %lu"), real_name,
+			    (u_long)((DBMETA *)mbuf)->version);
+			ret = DB_OLD_VERSION;
+			goto err;
+		}
 		break;
 	case DB_QAMMAGIC:
 		switch (((DBMETA *)mbuf)->version) {
diff --git a/src/db/db_upg_opd.c b/src/db/db_upg_opd.c
index 992115ad..6f6dfb71 100644
--- a/src/db/db_upg_opd.c
+++ b/src/db/db_upg_opd.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -37,6 +37,9 @@ static int __db_up_ovref __P((DB *, DB_FH *, db_pgno_t));
  * __db_31_offdup --
  *	Convert 3.0 off-page duplicates to 3.1 off-page duplicates.
  *
+ *	This code and its descendants should be removed when support for
+ *	upgrading from a 3.0 database format is removed.
+ *
  * PUBLIC: int __db_31_offdup __P((DB *, char *, DB_FH *, int, db_pgno_t *));
  */
 int
@@ -317,7 +320,7 @@ __db_build_ri(dbp, fhp, ipage, page, indx, nomemp)
 
 /*
  * __db_up_ovref --
- *	Increment/decrement the reference count on an overflow page.
+ *	Increment the reference count on an overflow page.
  */
 static int
 __db_up_ovref(dbp, fhp, pgno)
diff --git a/src/db/db_vrfy.c b/src/db/db_vrfy.c
index 9cb94ad2..a8c80cae 100644
--- a/src/db/db_vrfy.c
+++ b/src/db/db_vrfy.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -553,7 +553,7 @@ __db_vrfy_pagezero(dbp, vdp, fhp, name, flags)
 	if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0)
 		return (ret);
 
-	if ((ret = __db_chk_meta(env, dbp, meta, 1)) != 0) {
+	if ((ret = __db_chk_meta(env, dbp, meta, DB_CHK_META)) != 0) {
 		EPRINT((env, DB_STR_A("0522",
 		    "Page %lu: metadata page corrupted", "%lu"),
 		    (u_long)PGNO_BASE_MD));
@@ -920,7 +920,7 @@ err1:			if (ret == 0)
 	 * If we've seen a Queue metadata page, we may need to walk Queue
 	 * extent pages that won't show up between 0 and vdp->last_pgno.
 	 */
-	if (F_ISSET(vdp, VRFY_QMETA_SET) && (t_ret =
+	if (F_ISSET(vdp, SALVAGE_QMETA_SET) && (t_ret =
 	    __qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags)) != 0) {
 		if (ret == 0)
 			ret = t_ret;
@@ -1563,6 +1563,10 @@ __db_vrfy_meta(dbp, vdp, meta, pgno, flags)
 	 * If we don't have FTRUNCATE then mpool could include some
 	 * zeroed pages at the end of the file, we assume the meta page
 	 * is correct.  Queue does not update the meta page's last_pgno.
+	 *
+	 * We have seen one false positive after a failure while rolling the log
+	 * forward, last_pgno was updated and the file had not yet been
+	 * extended.  [#18418]
 	 */
 	if (pgno == PGNO_BASE_MD &&
 	    dbtype != DB_QUEUE && meta->last_pgno != vdp->last_pgno) {
@@ -2401,6 +2405,15 @@ __db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp)
 		 * length, so it's not possible to certify it as safe.
 		 */
 		switch (B_TYPE(bk->type)) {
+		case B_BLOB:
+			len = bk->len;
+			if (len != BBLOB_DSIZE) {
+				EPRINT((env, DB_STR_A("0771",
+				    "Page %lu: item %lu illegal size.",
+				    "%lu %lu"), (u_long)pgno, (u_long)i));
+				return (DB_VERIFY_BAD);
+			}
+			break;
 		case B_KEYDATA:
 			len = bk->len;
 			break;
diff --git a/src/db/db_vrfy_stub.c b/src/db/db_vrfy_stub.c
index 5037f33e..a9eed84c 100644
--- a/src/db/db_vrfy_stub.c
+++ b/src/db/db_vrfy_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/db/db_vrfyutil.c b/src/db/db_vrfyutil.c
index d72e1188..3a64bd50 100644
--- a/src/db/db_vrfyutil.c
+++ b/src/db/db_vrfyutil.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -43,6 +43,9 @@ __db_vrfy_dbinfo_create(env, ip, pgsize, vdpp)
 	if ((ret = __db_create_internal(&cdbp, env, 0)) != 0)
 		goto err;
 
+	if ((ret = __db_set_blob_threshold(cdbp, 0, 0)) != 0)
+		goto err;
+
 	if ((ret = __db_set_flags(cdbp, DB_DUP)) != 0)
 		goto err;
 
@@ -60,6 +63,9 @@ __db_vrfy_dbinfo_create(env, ip, pgsize, vdpp)
 	if ((ret = __db_create_internal(&pgdbp, env, 0)) != 0)
 		goto err;
 
+	if ((ret = __db_set_blob_threshold(pgdbp, 0, 0)) != 0)
+		goto err;
+
 	if ((ret = __db_set_pagesize(pgdbp, pgsize)) != 0)
 		goto err;
 
@@ -928,5 +934,6 @@ __db_vrfy_prdbt(dbtp, checkprint, prefix,
 	}
 	return (
 	    __db_prdbt(dbtp, checkprint,
-	    prefix, handle, callback, is_recno, is_heap));
+	    prefix, handle, callback, is_recno, is_heap,
+	    vdp != NULL && F_ISSET(vdp, SALVAGE_STREAM_BLOB) ? 1 : 0));
 }
diff --git a/src/db/partition.c b/src/db/partition.c
index f8beaf16..86491ba3 100644
--- a/src/db/partition.c
+++ b/src/db/partition.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -32,13 +32,12 @@ static int __partc_writelock __P((DBC*));
 static int __partition_chk_meta __P((DB *,
 		DB_THREAD_INFO *, DB_TXN *, u_int32_t));
 static int __partition_setup_keys __P((DBC *,
-		DB_PARTITION *, DBMETA *, u_int32_t));
+		DB_PARTITION *, u_int32_t, u_int32_t));
 static int __part_key_cmp __P((const void *, const void *));
 static inline void __part_search __P((DB *,
 		DB_PARTITION *, DBT *, u_int32_t *));
 
-static char *Alloc_err = DB_STR_A("0644",
-    "Partition open failed to allocate %d bytes", "%d");
+#define	ALLOC_ERR DB_STR_A("0764","Partition failed to allocate %d bytes","%d")
 
 /*
  * Allocate a partition cursor and copy flags to the partition cursor.
@@ -70,20 +69,27 @@ static inline void __part_search(dbp, part, key, part_idp)
 {
 	db_indx_t base, indx, limit;
 	int cmp;
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
+	size_t pos, pos_h, pos_l;
 
 	DB_ASSERT(dbp->env, part->nparts != 0);
 	COMPQUIET(cmp, 0);
 	COMPQUIET(indx, 0);
 
+	pos_h = 0;
+	pos_l = 0;
 	func = ((BTREE *)dbp->bt_internal)->bt_compare;
 	DB_BINARY_SEARCH_FOR(base, limit, part->nparts, O_INDX) {
+		pos = pos_l > pos_h ? pos_h : pos_l;
 		DB_BINARY_SEARCH_INCR(indx, base, limit, O_INDX);
-		cmp = func(dbp, key, &part->keys[indx]);
+		cmp = func(dbp, key, &part->keys[indx], &pos);
 		if (cmp == 0)
 			break;
-		if (cmp > 0)
+		if (cmp > 0) {
 			DB_BINARY_SEARCH_SHIFT_BASE(indx, base, limit, O_INDX);
+			pos_l = pos;
+		} else
+			pos_h = pos;
 	}
 	if (cmp == 0)
 		*part_idp = indx;
@@ -146,7 +152,8 @@ __partition_set(dbp, parts, keys, callback)
 {
 	DB_PARTITION *part;
 	ENV *env;
-	int ret;
+	u_int32_t i;
+	int ret, t_ret;
 
 	DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_partition");
 	env = dbp->dbenv->env;
@@ -155,6 +162,11 @@ __partition_set(dbp, parts, keys, callback)
 		__db_errx(env, DB_STR("0646",
 		    "Must specify at least 2 partitions."));
 		return (EINVAL);
+	} else if (parts > PART_MAXIMUM) {
+		__db_errx(env, DB_STR_A("0772",
+		    "Must not specify more than %u partitions.", "%u"),
+		    (unsigned int)PART_MAXIMUM);
+		return (EINVAL);
 	}
 
 	if (keys == NULL && callback == NULL) {
@@ -178,11 +190,59 @@ bad:		__db_errx(env, DB_STR("0648",
 	    (part->callback != NULL && keys != NULL))
 		goto bad;
 
+	/*
+	 * Free a key array that was allocated by an earlier set_partition call.
+	 */
+	if (part->keys != NULL) {
+		for (i = 0; i < part->nparts - 1; i++) {
+			/*
+			 * Always free all entries in the key array and return
+			 * the first error code.
+			 */
+			if ((t_ret = __db_dbt_clone_free(dbp->env,
+			    &part->keys[i])) != 0 && ret == 0)
+				ret = t_ret;
+		}
+		__os_free(dbp->env, part->keys);
+		part->keys = NULL;
+	}
+
+	if (ret != 0)
+		return (ret);
+
 	part->nparts = parts;
-	part->keys = keys;
 	part->callback = callback;
 
-	return (0);
+	/*
+	 * Take a copy of the users key array otherwise we cannot be sure
+	 * that the memory will still be valid when the database is opened.
+	 */
+	if (keys != NULL) {
+		if ((ret = __os_calloc(dbp->env,
+		    part->nparts - 1, sizeof(DBT), &part->keys)) != 0)
+			goto err;
+
+		for (i = 0, parts = 0; i < part->nparts - 1; i++, parts++)
+			if ((ret = __db_dbt_clone(dbp->env,
+			    &part->keys[i], &keys[i])) != 0)
+				goto err;
+	}
+
+err:	if (ret != 0 && part->keys != NULL) {
+		/*
+		 * Always free those entries cloned successfully in the key
+		 * array and the one which fails in __db_dbt_clone, and
+		 * return the first error code. As ret != 0 here, so it is
+		 * safe to ignore any error from __db_dbt_clone_free.
+		 */
+		for (i = 0; i < parts; i++)
+			(void)__db_dbt_clone_free(dbp->env, &part->keys[i]);
+		if (parts < part->nparts - 1 && part->keys[parts].data != NULL)
+			__os_free(dbp->env, part->keys[parts].data);
+		__os_free(dbp->env, part->keys);
+		part->keys = NULL;
+	}
+	return (ret);
 }
 
 /*
@@ -288,15 +348,16 @@ __partition_open(dbp, ip, txn, fname, type, flags, mode, do_open)
 
 	if ((ret = __os_calloc(env,
 	     part->nparts, sizeof(*part->handles), &part->handles)) != 0) {
-		__db_errx(env,
-		    Alloc_err, part->nparts * sizeof(*part->handles));
+		__db_errx(env, ALLOC_ERR,
+		    (int)(part->nparts * sizeof(*part->handles)));
 		goto err;
 	}
 
 	DB_ASSERT(env, fname != NULL);
 	if ((ret = __os_malloc(env,
 	     strlen(fname) + PART_LEN + 1, &name)) != 0) {
-		__db_errx(env, Alloc_err, strlen(fname) + PART_LEN + 1);
+		__db_errx(env, ALLOC_ERR,
+		    (int)(strlen(fname) + PART_LEN + 1));
 		goto err;
 	}
 
@@ -330,6 +391,9 @@ __partition_open(dbp, ip, txn, fname, type, flags, mode, do_open)
 		part_db->dup_compare = dbp->dup_compare;
 		part_db->app_private = dbp->app_private;
 		part_db->api_internal = dbp->api_internal;
+		part_db->blob_threshold = dbp->blob_threshold;
+		part_db->blob_file_id = dbp->blob_file_id;
+		part_db->blob_sdb_id = dbp->blob_sdb_id;
 
 		if (dbp->type == DB_BTREE)
 			__bam_copy_config(dbp, part_db, part->nparts);
@@ -388,7 +452,8 @@ __partition_chk_meta(dbp, ip, txn, flags)
 	DB_MPOOLFILE *mpf;
 	ENV *env;
 	db_pgno_t base_pgno;
-	int ret, t_ret;
+	int ret, set_keys, t_ret;
+	u_int32_t pgsize;
 
 	dbc = NULL;
 	meta = NULL;
@@ -397,6 +462,14 @@ __partition_chk_meta(dbp, ip, txn, flags)
 	mpf = dbp->mpf;
 	env = dbp->env;
 	ret = 0;
+	set_keys = 0;
+
+	/*
+	 * Just to fix the lint warning.
+	 * The real value will be set later, and we will
+	 * only use the value after being set properly.
+	 */
+	pgsize = dbp->pgsize;
 
 	/* Get a cursor on the main db.  */
 	dbp->p_internal = NULL;
@@ -475,10 +548,12 @@ __partition_chk_meta(dbp, ip, txn, flags)
 		}
 	} else if (meta->magic != DB_BTREEMAGIC) {
 		__db_errx(env, DB_STR("0658",
-		    "Partitioning only supported on BTREE nad HASH."));
+		    "Partitioning only supported on BTREE and HASH."));
 		ret = EINVAL;
-	} else
-		ret = __partition_setup_keys(dbc, part, meta, flags);
+	} else {
+		set_keys = 1;
+		pgsize = meta->pagesize;
+	}
 
 err:	/* Put the metadata page back. */
 	if (meta != NULL && (t_ret = __memp_fput(mpf,
@@ -487,6 +562,15 @@ err:	/* Put the metadata page back. */
 	if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
 		ret = t_ret;
 
+	/*
+	 * We can only call __partition_setup_keys after putting
+	 * the meta page and releasing the meta lock, or self-deadlock
+	 * will occur.
+	 */
+	if (ret == 0 && set_keys && (t_ret =
+	    __partition_setup_keys(dbc, part, pgsize, flags)) != 0)
+		ret = t_ret;
+
 	if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
 		ret = t_ret;
 
@@ -502,7 +586,7 @@ err:	/* Put the metadata page back. */
 struct key_sort {
 	DB *dbp;
 	DBT *key;
-	int (*compare) __P((DB *, const DBT *, const DBT *));
+	int (*compare) __P((DB *, const DBT *, const DBT *, size_t *));
 };
 
 static int __part_key_cmp(a, b)
@@ -512,7 +596,7 @@ static int __part_key_cmp(a, b)
 
 	ka = a;
 	kb = b;
-	return (ka->compare(ka->dbp, ka->key, kb->key));
+	return (ka->compare(ka->dbp, ka->key, kb->key, NULL));
 }
 /*
  * __partition_setup_keys --
@@ -520,25 +604,22 @@ static int __part_key_cmp(a, b)
  * are creating a partitioned database.
  */
 static int
-__partition_setup_keys(dbc, part, meta, flags)
+__partition_setup_keys(dbc, part, pgsize, flags)
 	DBC *dbc;
 	DB_PARTITION *part;
-	DBMETA *meta;
-	u_int32_t flags;
+	u_int32_t flags, pgsize;
 {
 	BTREE *t;
 	DB *dbp;
-	DBT data, key, *keys, *kp;
+	DBT data, key, *keys, *kp, *okp;
 	ENV *env;
-	u_int32_t ds, i, j;
-	u_int8_t *dd;
+	db_pgno_t last_pgno;
+	u_int32_t cgetflags, i, j;
+	size_t dsize;
 	struct key_sort *ks;
-	int have_keys, ret;
-	int (*compare) __P((DB *, const DBT *, const DBT *));
-	void *dp;
+	int have_keys, ret, t_ret;
+	int (*compare) __P((DB *, const DBT *, const DBT *, size_t *));
 
-	COMPQUIET(dd, NULL);
-	COMPQUIET(ds, 0);
 	memset(&data, 0, sizeof(data));
 	memset(&key, 0, sizeof(key));
 	ks = NULL;
@@ -549,6 +630,9 @@ __partition_setup_keys(dbc, part, meta, flags)
 	/* Need to just read the main database. */
 	dbp->p_internal = NULL;
 	have_keys = 0;
+	dsize = 0;
+
+	keys = part->keys;
 
 	/* First verify that things what we expect. */
 	if ((ret = __dbc_get(dbc, &key, &data, DB_FIRST)) != 0) {
@@ -581,11 +665,15 @@ __partition_setup_keys(dbc, part, meta, flags)
 	}
 
 	if (LF_ISSET(DB_CREATE) && have_keys == 0) {
-		/* Insert the keys into the master database. */
+		/*
+		 * Insert the keys into the master database.  We will also
+		 * compute the total size of the keys for later use.
+		 */
 		for (i = 0; i < part->nparts - 1; i++) {
 			if ((ret = __db_put(dbp, dbc->thread_info,
 			    dbc->txn, &part->keys[i], &data, 0)) != 0)
 				    goto err;
+			dsize += part->keys[i].size;
 		}
 
 		/*
@@ -604,39 +692,71 @@ __partition_setup_keys(dbc, part, meta, flags)
 	}
 done:	if (F_ISSET(part, PART_RANGE)) {
 		/*
-		 * Allocate one page to hold the keys plus space at the
-		 * end of the buffer to put an array of DBTs.  If there
-		 * is not enough space __dbc_get will return how much
-		 * is needed and we realloc.
+		 * If we just did the insert, we have known the total size of
+		 * the keys. Otherwise, the keys must have been in the database,
+		 * and we can calculate the size by checking the last pgno of
+		 * the corresponding mpoolfile.
+		 *
+		 * We make the size aligned at 1024 for performance.
 		 */
+		if (dsize == 0) {
+			ret = __memp_get_last_pgno(dbp->mpf, &last_pgno);
+			if (ret != 0)
+				goto err;
+			if (last_pgno > 1)
+				last_pgno--;
+			dsize = last_pgno * pgsize;
+		}
+		dsize = DB_ALIGN(dsize, 1024);
+
 		if ((ret = __os_malloc(env,
-		    meta->pagesize + (sizeof(DBT) * part->nparts),
+		    dsize + (sizeof(DBT) * part->nparts),
 		    &part->data)) != 0) {
-			__db_errx(env, Alloc_err, meta->pagesize);
+			__db_errx(env, ALLOC_ERR, (int)dsize);
 			goto err;
 		}
+		memset(part->data, 0,
+		    dsize + (sizeof(DBT) * part->nparts));
+
+		kp = okp = (DBT *)
+		    ((u_int8_t *)part->data + dsize);
 		memset(&key, 0, sizeof(key));
 		memset(&data, 0, sizeof(data));
-		data.data = part->data;
-		data.ulen = meta->pagesize;
 		data.flags = DB_DBT_USERMEM;
-again:		if ((ret = __dbc_get(dbc, &key, &data,
-		     DB_FIRST | DB_MULTIPLE_KEY)) == DB_BUFFER_SMALL) {
-			if ((ret = __os_realloc(env,
-			      data.size + (sizeof(DBT) * part->nparts),
-			      &part->data)) != 0)
+		j = 0;
+		cgetflags = DB_FIRST;
+		while ((ret = __dbc_get(dbc, &key, &data, cgetflags)) == 0) {
+			 /* It is an error if we get more keys than expect. */
+			if ((u_int32_t)(kp - okp) > part->nparts) {
+				ret = EINVAL;
 				goto err;
-			data.data = part->data;
-			data.ulen = data.size;
-			goto again;
+			}
+			kp->size = key.size;
+			kp->data = (u_int8_t *)part->data + j;
+			/* It is an error if the keys overflow the space. */
+			if (j + kp->size > dsize) {
+				ret = EINVAL;
+				goto err;
+			}
+			memcpy(kp->data, key.data, kp->size);
+			j += kp->size;
+			cgetflags = DB_NEXT;
+			kp++;
 		}
+
+		/*
+		 * We should get part->nparts keys back, otherwise it means
+		 * the passed-in keys are not valid.
+		 */
+		if (ret == DB_NOTFOUND && (u_int32_t)(kp - okp) == part->nparts)
+			ret = 0;
+
 		if (ret == 0) {
 			/*
 			 * They passed in keys, they must match.
 			 */
-			keys = NULL;
 			compare = NULL;
-			if (have_keys == 1 && (keys = part->keys) != NULL) {
+			if (have_keys == 1 && keys != NULL) {
 				t = dbc->dbp->bt_internal;
 				compare = t->bt_compare;
 				if ((ret = __os_malloc(env, (part->nparts - 1)
@@ -651,20 +771,15 @@ again:		if ((ret = __dbc_get(dbc, &key, &data,
 				qsort(ks, (size_t)part->nparts - 1,
 				    sizeof(struct key_sort), __part_key_cmp);
 			}
-			DB_MULTIPLE_INIT(dp, &data);
 			part->keys = (DBT *)
-			    ((u_int8_t *)part->data + data.size);
+			    ((u_int8_t *)part->data + dsize);
+			F_SET(part, PART_KEYS_SETUP);
 			j = 0;
 			for (kp = part->keys;
 			    kp < &part->keys[part->nparts]; kp++, j++) {
-				DB_MULTIPLE_KEY_NEXT(dp,
-				     &data, kp->data, kp->size, dd, ds);
-				if (dp == NULL) {
-					ret = DB_NOTFOUND;
-					break;
-				}
-				if (keys != NULL && j != 0 &&
-				    compare(dbc->dbp, ks[j - 1].key, kp) != 0) {
+				if (have_keys == 1 && keys != NULL && j != 0 &&
+				    compare(dbc->dbp, ks[j - 1].key,
+				    kp, NULL) != 0) {
 					if (kp->data == NULL &&
 					    F_ISSET(dbp, DB_AM_RECOVER))
 						goto err;
@@ -683,6 +798,24 @@ again:		if ((ret = __dbc_get(dbc, &key, &data,
 err:	dbp->p_internal = part;
 	if (ks != NULL)
 		__os_free(env, ks);
+
+	/*
+	 * We only free the original copy of the key array when
+	 * the keys have been setup properly, otherwise we let
+	 * the close function to free the memory.
+	 */
+	if (keys != NULL && F_ISSET(part, PART_KEYS_SETUP)) {
+		for (i = 0; i < part->nparts - 1; i++)
+			/*
+			 * Always free all entries in the key array and return
+			 * the first error code.
+			 */
+			if ((t_ret = __db_dbt_clone_free(env,
+			    &keys[i])) != 0 && ret == 0)
+				ret = t_ret;
+		__os_free(env, keys);
+	}
+
 	return (ret);
 }
 
@@ -1183,6 +1316,15 @@ __partition_close(dbp, txn, flags)
 				ret = t_ret;
 		__os_free(env, part->handles);
 	}
+	if (!F_ISSET(part, PART_KEYS_SETUP) && part->keys != NULL) {
+		for (i = 0; i < part->nparts - 1; i++) {
+			if (part->keys[i].data != NULL && (t_ret =
+			    __db_dbt_clone_free(env, &part->keys[i])) != 0 &&
+			    ret == 0)
+				ret = t_ret;
+		}
+		__os_free(env, part->keys);
+	}
 	if (part->dirs != NULL)
 		__os_free(env, (char **)part->dirs);
 	if (part->data != NULL)
@@ -1471,7 +1613,8 @@ __part_fileid_reset(env, ip, fname, nparts, encrypted)
 
 	if ((ret = __os_malloc(env,
 	     strlen(fname) + PART_LEN + 1, &name)) != 0) {
-		__db_errx(env, Alloc_err, strlen(fname) + PART_LEN + 1);
+		__db_errx(env, ALLOC_ERR,
+		    (int)(strlen(fname) + PART_LEN + 1));
 		return (ret);
 	}
 
@@ -1747,7 +1890,8 @@ __part_rr(dbp, ip, txn, name, subdb, newname, flags)
 	COMPQUIET(np, NULL);
 	if (newname != NULL && (ret = __os_malloc(env,
 	     strlen(newname) + PART_LEN + 1, &np)) != 0) {
-		__db_errx(env, Alloc_err, strlen(newname) + PART_LEN + 1);
+		__db_errx(env, ALLOC_ERR,
+		    (int)(strlen(newname) + PART_LEN + 1));
 		goto err;
 	}
 	for (i = 0; i < part->nparts; i++, pdbp++) {
@@ -1790,6 +1934,32 @@ err:		/*
 	}
 	return (ret);
 }
+
+/*
+ * __partc_dup --
+ *	Duplicate a cursor on a partitioned database.
+ *
+ * PUBLIC: int __partc_dup __P((DBC *, DBC *));
+ */
+int
+__partc_dup(dbc_orig, dbc_n)
+	DBC *dbc_orig;
+	DBC *dbc_n;
+{
+	PART_CURSOR *orig, *new;
+
+	orig = (PART_CURSOR *)dbc_orig->internal;
+	new = (PART_CURSOR *)dbc_n->internal;
+
+	/*
+	 * A cursor on a partitioned database contains the identifier
+	 * of the underlying database and a regular cursor that points
+	 * to the underlying database.  Copy both pieces.
+	 */
+	new->part_id = orig->part_id;
+
+	return (__dbc_dup(orig->sub_cursor, &new->sub_cursor, DB_POSITION));
+}
 #ifdef HAVE_VERIFY
 /*
  * __part_verify --
diff --git a/src/dbinc/atomic.h b/src/dbinc/atomic.h
index 096176a5..61f2ead9 100644
--- a/src/dbinc/atomic.h
+++ b/src/dbinc/atomic.h
@@ -1,7 +1,7 @@
 /*
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2009, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2009, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -79,12 +79,11 @@ typedef struct {
 #define	WINCE_ATOMIC_MAGIC(p)						\
 	/*								\
 	 * Memory mapped regions on Windows CE cause problems with	\
-	 * InterlockedXXX calls. Each page in a mapped region needs to	\
-	 * have been written to prior to an InterlockedXXX call, or the	\
-	 * InterlockedXXX call hangs. This does not seem to be		\
-	 * documented anywhere. For now, read/write a non-critical	\
-	 * piece of memory from the shared region prior to attempting	\
-	 * shared region prior to attempting an InterlockedExchange	\
+	 * InterlockedXXX calls. Each process making an InterlockedXXX	\
+	 * call must make sure that it has written to the page prior to	\
+	 * the call, or the InterlockedXXX call hangs. This does not	\
+	 * seem	to be documented anywhere. Write a non-critical piece	\
+	 * of memory from the shared region prior to attempting an	\
 	 * InterlockedXXX operation.					\
 	 */								\
 	(p)->dummy = 0
@@ -144,7 +143,7 @@ typedef LONG volatile *interlocked_val;
 #define	atomic_inc(env, p)	__atomic_inc(p)
 #define	atomic_dec(env, p)	__atomic_dec(p)
 #define	atomic_compare_exchange(env, p, o, n)	\
-	__atomic_compare_exchange((p), (o), (n))
+	__atomic_compare_exchange_int((p), (o), (n))
 static inline int __atomic_inc(db_atomic_t *p)
 {
 	int	temp;
@@ -176,7 +175,7 @@ static inline int __atomic_dec(db_atomic_t *p)
  * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
  * which configure could be changed to use.
  */
-static inline int __atomic_compare_exchange(
+static inline int __atomic_compare_exchange_int(
 	db_atomic_t *p, atomic_value_t oldval, atomic_value_t newval)
 {
 	atomic_value_t was;
diff --git a/src/dbinc/blob.h b/src/dbinc/blob.h
new file mode 100644
index 00000000..f4ff475b
--- /dev/null
+++ b/src/dbinc/blob.h
@@ -0,0 +1,103 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2013, 2015 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef	_DB_BLOB_H_
+#define	_DB_BLOB_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * How many characters can the path for a blob file use?
+ * Up to 6 subdirectory separators.
+ * Up to 6 directory names of up to three characters each.
+ * Up to 21 characters for blob_id identifier.
+ * 7 characters for the standard prefix (__db.bl)
+ * 1 for luck (or NULL)
+ * The largest blob id, 9,223,372,036,854,775,807 would
+ * produce a path and file name:
+ * 009/223/372/036/854/775/807/__db.bl009223372036854775807
+ */
+#define MAX_BLOB_PATH	"009/223/372/036/854/775/807/__db.bl009223372036854775807"
+#define	MAX_BLOB_PATH_SZ	sizeof(MAX_BLOB_PATH)
+#define	BLOB_DEFAULT_DIR	"__db_bl"
+#define	BLOB_META_FILE_NAME	"__db_blob_meta.db"
+#define	BLOB_DIR_PREFIX		"__db"
+#define	BLOB_FILE_PREFIX	"__db.bl"
+
+#define	BLOB_DIR_ELEMS		1000
+
+#define IS_BLOB_META(name)						\
+    (name != NULL && strstr(name, BLOB_META_FILE_NAME) != NULL)
+#define IS_BLOB_FILE(name)						\
+    (name != NULL && strstr(name, BLOB_FILE_PREFIX) != NULL)
+
+/*
+ * Combines two unsigned 32 bit integers into a 64 bit integer.
+ * Blob database file ids and sub database ids are 64 bit integers,
+ * but have to be stored on database metadata pages that must
+ * be readable on 32 bit only compilers.  So the ids are split into
+ * two 32 bit integers, and combined when needed.
+ */
+#define GET_LO_HI(e, lo, hi, o, ret)	do {				\
+	DB_ASSERT((e), sizeof(o) <= 8);					\
+	if (sizeof(o) == 8) {						\
+		(o) = (hi);						\
+		(o) = ((o) << 32);					\
+		(o) += (lo);						\
+	} else {							\
+		if ((hi) > 0) {						\
+			__db_errx((e), DB_STR("0765",			\
+			    "Offset or id size overflow."));		\
+			(ret) = EINVAL;					\
+		}							\
+		(o) = (lo);						\
+	}								\
+} while (0);
+
+#define GET_BLOB_FILE_ID(e, p, o, ret)					\
+	GET_LO_HI(e, (p)->blob_file_lo, (p)->blob_file_hi, o, ret);
+
+#define GET_BLOB_SDB_ID(e, p, o, ret)					\
+	GET_LO_HI(e, (p)->blob_sdb_lo, (p)->blob_sdb_hi, o, ret);
+
+/* Splits a 64 bit integer into two unsigned 32 bit integers. */
+#define SET_LO_HI(p, v, type, field_lo, field_hi)	do {		\
+	u_int32_t tmp;							\
+	if (sizeof((v)) == 8) {						\
+		tmp = (u_int32_t)((v) >> 32);				\
+		memcpy(((u_int8_t *)p) + SSZ(type, field_hi),		\
+		    &tmp, sizeof(u_int32_t));				\
+	} else {							\
+		memset(((u_int8_t *)p) + SSZ(type, field_hi),		\
+		    0, sizeof(u_int32_t));				\
+	}								\
+	tmp = (u_int32_t)(v);						\
+	memcpy(((u_int8_t *)p) + SSZ(type, field_lo),			\
+	    &tmp, sizeof(u_int32_t));					\
+} while (0);
+
+#define SET_LO_HI_VAR(v, field_lo, field_hi)	do {			\
+	if (sizeof((v)) == 8)						\
+		field_hi = (u_int32_t)((v) >> 32);			\
+	else								\
+		field_hi = 0;						\
+	field_lo = (u_int32_t)(v);					\
+} while (0);
+
+#define SET_BLOB_META_FILE_ID(p, v, type)					\
+	SET_LO_HI(p, v, type, blob_file_lo, blob_file_hi);
+
+#define SET_BLOB_META_SDB_ID(p, v, type)					\
+	SET_LO_HI(p, v, type, blob_sdb_lo, blob_sdb_hi);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_BLOB_H_ */
diff --git a/src/dbinc/btree.h b/src/dbinc/btree.h
index 86bbec14..a8b9e1ee 100644
--- a/src/dbinc/btree.h
+++ b/src/dbinc/btree.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -472,7 +472,7 @@ struct __btree {			/* Btree access method. */
 	u_int32_t bt_minkey;		/* Minimum keys per page. */
 
 					/* Btree comparison function. */
-	int (*bt_compare) __P((DB *, const DBT *, const DBT *));
+	int (*bt_compare) __P((DB *, const DBT *, const DBT *, size_t *));
 					/* Btree prefix function. */
 	size_t (*bt_prefix) __P((DB *, const DBT *, const DBT *));
 					/* Btree compress function. */
@@ -483,7 +483,8 @@ struct __btree {			/* Btree access method. */
 	int (*bt_decompress) __P((DB *, const DBT *, const DBT *, DBT *, DBT *,
 					 DBT *));
 					/* dup_compare for compression */
-	int (*compress_dup_compare) __P((DB *, const DBT *, const DBT *));
+	int (*compress_dup_compare) __P((DB *, const DBT *, const DBT *,
+	    size_t *));
 #endif
 
 					/* Recno access method. */
@@ -539,7 +540,7 @@ typedef enum {
  * Flags for __bam_pinsert.
  */
 #define	BPI_SPACEONLY	0x01		/* Only check for space to update. */
-#define	BPI_NORECNUM	0x02		/* Not update the recnum on the left. */
+#define	BPI_NORECNUM	0x02		/* Don't update the left's recnum. */
 #define	BPI_NOLOGGING	0x04		/* Don't log the update. */
 #define	BPI_REPLACE	0x08		/* Replace the record. */
 
diff --git a/src/dbinc/clock.h b/src/dbinc/clock.h
index caeaee70..b2815ea2 100644
--- a/src/dbinc/clock.h
+++ b/src/dbinc/clock.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -125,6 +125,13 @@ typedef struct {
 		timespecadd((vvp), &__tmp);			        \
 	} while (0)
 
+#define	TIMESPEC_SUB_DB_TIMEOUT(vvp, t)					\
+	do {								\
+		db_timespec __tmp;					\
+		DB_TIMEOUT_TO_TIMESPEC(t, &__tmp);			\
+		timespecsub((vvp), &__tmp);				\
+	} while (0)
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/src/dbinc/crypto.h b/src/dbinc/crypto.h
index ea7a9cf0..4d889fd9 100644
--- a/src/dbinc/crypto.h
+++ b/src/dbinc/crypto.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbinc/cxx_int.h b/src/dbinc/cxx_int.h
index 5492ead7..368bac86 100644
--- a/src/dbinc/cxx_int.h
+++ b/src/dbinc/cxx_int.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbinc/db.in b/src/dbinc/db.in
index a948910e..b592b746 100644
--- a/src/dbinc/db.in
+++ b/src/dbinc/db.in
@@ -1,7 +1,7 @@
 /*
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  *
@@ -102,6 +102,7 @@ extern "C" {
 
 @FILE_t_decl@
 @off_t_decl@
+@db_off_t_decl@
 @pid_t_decl@
 @size_t_decl@
 #ifdef HAVE_MIXED_SIZE_ADDRESSING
@@ -131,9 +132,9 @@ typedef	u_int16_t	db_indx_t;	/* Page offset type. */
 #define	DB_MAX_PAGES	0xffffffff	/* >= # of pages in a file */
 
 typedef	u_int32_t	db_recno_t;	/* Record number type. */
-#define	DB_MAX_RECORDS	0xffffffff	/* >= # of records in a tree */
+#define	DB_MAX_RECORDS	0xffffffff	/* >= # of records in a recno tree. */
 
-typedef u_int32_t	db_timeout_t;	/* Type of a timeout. */
+typedef u_int32_t	db_timeout_t;	/* Type of a timeout in microseconds. */
 
 /*
  * Region offsets are the difference between a pointer in a region and the
@@ -157,6 +158,10 @@ struct __db_compact;	typedef struct __db_compact DB_COMPACT;
 struct __db_dbt;	typedef struct __db_dbt DBT;
 struct __db_distab;	typedef struct __db_distab DB_DISTAB;
 struct __db_env;	typedef struct __db_env DB_ENV;
+struct __db_event_mutex_died_info;
+	typedef struct __db_event_mutex_died_info DB_EVENT_MUTEX_DIED_INFO;
+struct __db_event_failchk_info;
+	typedef struct __db_event_failchk_info DB_EVENT_FAILCHK_INFO;
 struct __db_h_stat;	typedef struct __db_h_stat DB_HASH_STAT;
 struct __db_heap_rid;	typedef struct __db_heap_rid DB_HEAP_RID;
 struct __db_heap_stat;	typedef struct __db_heap_stat DB_HEAP_STAT;
@@ -189,6 +194,7 @@ struct __db_repmgr_site;typedef struct __db_repmgr_site DB_REPMGR_SITE;
 struct __db_repmgr_stat;typedef struct __db_repmgr_stat DB_REPMGR_STAT;
 struct __db_seq_record; typedef struct __db_seq_record DB_SEQ_RECORD;
 struct __db_seq_stat;	typedef struct __db_seq_stat DB_SEQUENCE_STAT;
+struct __db_stream;	typedef struct __db_stream DB_STREAM;
 struct __db_site;	typedef struct __db_site DB_SITE;
 struct __db_sequence;	typedef struct __db_sequence DB_SEQUENCE;
 struct __db_thread_info;typedef struct __db_thread_info DB_THREAD_INFO;
@@ -226,18 +232,20 @@ struct __db_dbt {
 
 	void *app_data;
 
-#define	DB_DBT_APPMALLOC	0x001	/* Callback allocated memory. */
-#define	DB_DBT_BULK		0x002	/* Internal: Insert if duplicate. */
-#define	DB_DBT_DUPOK		0x004	/* Internal: Insert if duplicate. */
-#define	DB_DBT_ISSET		0x008	/* Lower level calls set value. */
-#define	DB_DBT_MALLOC		0x010	/* Return in malloc'd memory. */
-#define	DB_DBT_MULTIPLE		0x020	/* References multiple records. */
-#define	DB_DBT_PARTIAL		0x040	/* Partial put/get. */
-#define	DB_DBT_REALLOC		0x080	/* Return in realloc'd memory. */
-#define	DB_DBT_READONLY		0x100	/* Readonly, don't update. */
-#define	DB_DBT_STREAMING	0x200	/* Internal: DBT is being streamed. */
-#define	DB_DBT_USERCOPY		0x400	/* Use the user-supplied callback. */
-#define	DB_DBT_USERMEM		0x800	/* Return in user's memory. */
+#define	DB_DBT_APPMALLOC	0x0001	/* Callback allocated memory. */
+#define	DB_DBT_BULK		0x0002	/* Internal: Insert if duplicate. */
+#define	DB_DBT_DUPOK		0x0004	/* Internal: Insert if duplicate. */
+#define	DB_DBT_ISSET		0x0008	/* Lower level calls set value. */
+#define	DB_DBT_MALLOC		0x0010	/* Return in malloc'd memory. */
+#define	DB_DBT_MULTIPLE		0x0020	/* References multiple records. */
+#define	DB_DBT_PARTIAL		0x0040	/* Partial put/get. */
+#define	DB_DBT_REALLOC		0x0080	/* Return in realloc'd memory. */
+#define	DB_DBT_READONLY		0x0100	/* Readonly, don't update. */
+#define	DB_DBT_STREAMING	0x0200	/* Internal: DBT is being streamed. */
+#define	DB_DBT_USERCOPY		0x0400	/* Use the user-supplied callback. */
+#define	DB_DBT_USERMEM		0x0800	/* Return in user's memory. */
+#define	DB_DBT_BLOB		0x1000	/* Data item is a blob. */
+#define	DB_DBT_BLOB_REC		0x2000	/* Internal: Blob database record. */
 	u_int32_t flags;
 };
 
@@ -274,6 +282,23 @@ struct __db_mutex_stat { /* SHARED */
 #endif
 };
 
+/* Buffers passed to __mutex_describe() must be at least this large. */
+#define	DB_MUTEX_DESCRIBE_STRLEN	128
+
+/* This is the info of a DB_EVENT_MUTEX_DIED event notification. */
+struct __db_event_mutex_died_info {
+	pid_t         pid;	/* Process which last owned the mutex */
+	db_threadid_t tid;	/* Thread which last owned the mutex */
+	db_mutex_t    mutex;	/* ID of the mutex */
+	char	      desc[DB_MUTEX_DESCRIBE_STRLEN];
+};
+
+/* This is the info of a DB_EVENT_FAILCHK event notification. */
+#define DB_FAILURE_SYMPTOM_SIZE	120
+struct __db_event_failchk_info {
+	int	error;
+	char	symptom[DB_FAILURE_SYMPTOM_SIZE];
+};
 /* This is the length of the buffer passed to DB_ENV->thread_id_string() */
 #define	DB_THREADID_STRLEN	128
 
@@ -400,6 +425,8 @@ struct __db_lock_stat { /* SHARED */
 	uintmax_t st_lockers_nowait;	/* Locker lock granted without wait. */
 	uintmax_t st_region_wait;	/* Region lock granted after wait. */
 	uintmax_t st_region_nowait;	/* Region lock granted without wait. */
+	uintmax_t st_nlockers_hit;	/* Lockers found in thread info. */
+	uintmax_t st_nlockers_reused;	/* Lockers reallocated from thread info. */
 	u_int32_t st_hash_len;		/* Max length of bucket. */
 	roff_t	  st_regsize;		/* Region size. */
 #endif
@@ -469,7 +496,7 @@ struct __db_lockreq {
 /*******************************************************
  * Logging.
  *******************************************************/
-#define	DB_LOGVERSION	19		/* Current log version. */
+#define	DB_LOGVERSION	22		/* Current log version. */
 #define	DB_LOGVERSION_LATCHING 15	/* Log version using latching: db-4.8 */
 #define	DB_LOGCHKSUM	12		/* Check sum headers: db-4.5 */
 #define	DB_LOGOLDVER	8		/* Oldest version supported: db-4.2 */
@@ -595,7 +622,8 @@ typedef enum {
 	LOGREC_PGDDBT,
 	LOGREC_PGLIST,
 	LOGREC_POINTER,
-	LOGREC_TIME
+	LOGREC_TIME,
+	LOGREC_LONGARG
 } log_rec_type_t;
 
 typedef const struct __log_rec_spec {
@@ -755,6 +783,7 @@ struct __db_mpool_stat { /* SHARED */
 	uintmax_t st_mvcc_frozen;	/* Buffers frozen. */
 	uintmax_t st_mvcc_thawed;	/* Buffers thawed. */
 	uintmax_t st_mvcc_freed;	/* Frozen buffers freed. */
+	uintmax_t st_mvcc_reused;	/* Outdated invisible buffers reused. */
 	uintmax_t st_alloc;		/* Number of page allocations. */
 	uintmax_t st_alloc_buckets;	/* Buckets checked during allocation. */
 	uintmax_t st_alloc_max_buckets;/* Max checked during allocation. */
@@ -762,6 +791,8 @@ struct __db_mpool_stat { /* SHARED */
 	uintmax_t st_alloc_max_pages;	/* Max checked during allocation. */
 	uintmax_t st_io_wait;		/* Thread waited on buffer I/O. */
 	uintmax_t st_sync_interrupted;	/* Number of times sync interrupted. */
+	u_int32_t st_oddfsize_detect;	/* Odd file size detected. */
+	u_int32_t st_oddfsize_resolve;	/* Odd file size resolved. */
 	roff_t	  st_regsize;		/* Region size. */
 	roff_t	  st_regmax;		/* Region max. */
 #endif
@@ -956,7 +987,7 @@ struct __db_txn {
 #define	TXN_SNAPSHOT		0x08000	/* Snapshot Isolation. */
 #define	TXN_SYNC		0x10000	/* Write and sync on prepare/commit. */
 #define	TXN_WRITE_NOSYNC	0x20000	/* Write only on prepare/commit. */
-#define TXN_BULK		0x40000 /* Enable bulk loading optimization. */
+#define	TXN_BULK		0x40000 /* Enable bulk loading optimization. */
 	u_int32_t	flags;
 };
 
@@ -1065,30 +1096,34 @@ struct __db_txn_token {
 
 /*
  * Event notification types.  (Tcl testing interface currently assumes there are
- * no more than 32 of these.)
+ * no more than 32 of these.). Comments include any relevant event_info types.
  */
 #define	DB_EVENT_PANIC			 0
-#define	DB_EVENT_REG_ALIVE		 1
-#define	DB_EVENT_REG_PANIC		 2
-#define	DB_EVENT_REP_CLIENT		 3
-#define	DB_EVENT_REP_CONNECT_BROKEN	 4
-#define	DB_EVENT_REP_CONNECT_ESTD	 5
-#define	DB_EVENT_REP_CONNECT_TRY_FAILED	 6
-#define	DB_EVENT_REP_DUPMASTER		 7
-#define	DB_EVENT_REP_ELECTED		 8
-#define	DB_EVENT_REP_ELECTION_FAILED	 9
-#define	DB_EVENT_REP_INIT_DONE		10
-#define	DB_EVENT_REP_JOIN_FAILURE	11
-#define	DB_EVENT_REP_LOCAL_SITE_REMOVED	12
-#define	DB_EVENT_REP_MASTER		13
-#define	DB_EVENT_REP_MASTER_FAILURE	14
-#define	DB_EVENT_REP_NEWMASTER		15
-#define	DB_EVENT_REP_PERM_FAILED	16
-#define	DB_EVENT_REP_SITE_ADDED		17
-#define	DB_EVENT_REP_SITE_REMOVED	18
-#define	DB_EVENT_REP_STARTUPDONE	19
-#define	DB_EVENT_REP_WOULD_ROLLBACK	20	/* Undocumented; C API only. */
-#define	DB_EVENT_WRITE_FAILED		21
+#define	DB_EVENT_REG_ALIVE		 1	/* int: pid which was in env */
+#define	DB_EVENT_REG_PANIC		 2	/* int: error causing the panic. */
+#define	DB_EVENT_REP_AUTOTAKEOVER_FAILED 3
+#define	DB_EVENT_REP_CLIENT		 4
+#define	DB_EVENT_REP_CONNECT_BROKEN	 5	/* DB_REPMGR_CONN_ERR */
+#define	DB_EVENT_REP_CONNECT_ESTD	 6	/* int: EID of remote site */
+#define	DB_EVENT_REP_CONNECT_TRY_FAILED	 7	/* DB_REPMGR_CONN_ERR */
+#define	DB_EVENT_REP_DUPMASTER		 8
+#define	DB_EVENT_REP_ELECTED		 9
+#define	DB_EVENT_REP_ELECTION_FAILED	10
+#define	DB_EVENT_REP_INIT_DONE		11
+#define	DB_EVENT_REP_INQUEUE_FULL	12
+#define	DB_EVENT_REP_JOIN_FAILURE	13
+#define	DB_EVENT_REP_LOCAL_SITE_REMOVED	14
+#define	DB_EVENT_REP_MASTER		15
+#define	DB_EVENT_REP_MASTER_FAILURE	16
+#define	DB_EVENT_REP_NEWMASTER		17	/* int: new master's site id */
+#define	DB_EVENT_REP_PERM_FAILED	18
+#define	DB_EVENT_REP_SITE_ADDED		19	/* int: eid */
+#define	DB_EVENT_REP_SITE_REMOVED	20	/* int: eid */
+#define	DB_EVENT_REP_STARTUPDONE	21
+#define	DB_EVENT_REP_WOULD_ROLLBACK	22	/* Undocumented; C API only. */
+#define	DB_EVENT_WRITE_FAILED		23
+#define	DB_EVENT_MUTEX_DIED		24	/* DB_EVENT_MUTEX_DIED_INFO */
+#define	DB_EVENT_FAILCHK_PANIC		25	/* DB_EVENT_FAILCHK_INFO */
 #define	DB_EVENT_NO_SUCH_EVENT		 0xffffffff /* OOB sentinel value */
 
 /* Replication Manager site status. */
@@ -1102,6 +1137,7 @@ struct __db_repmgr_site {
 	u_int32_t status;
 
 #define	DB_REPMGR_ISPEER	0x01
+#define	DB_REPMGR_ISVIEW	0x02
 	u_int32_t flags;
 };
 
@@ -1117,6 +1153,7 @@ struct __db_rep_stat { /* SHARED */
 	 * circumstances, garbaged).
 	 */
 	u_int32_t st_startup_complete;	/* Site completed client sync-up. */
+	u_int32_t st_view;		/* Site is a view. */
 #ifndef __TEST_DB_NO_STATISTICS
 	uintmax_t st_log_queued;	/* Log records currently queued.+ */
 	u_int32_t st_status;		/* Current replication status. */
@@ -1194,6 +1231,7 @@ struct __db_rep_stat { /* SHARED */
 	/* Undocumented statistics only used by the test system. */
 #ifdef	CONFIG_TEST
 	u_int32_t st_filefail_cleanups;	/* # of FILE_FAIL cleanups done. */
+	uintmax_t st_log_futuredup;	/* Future log records that are dups. */
 #endif
 #endif
 };
@@ -1204,10 +1242,18 @@ struct __db_repmgr_stat { /* SHARED */
 	uintmax_t st_msgs_queued;	/* # msgs queued for network delay. */
 	uintmax_t st_msgs_dropped;	/* # msgs discarded due to excessive
 					   queue length. */
+	u_int32_t st_incoming_queue_gbytes;	/* Incoming queue size: GB. */
+	u_int32_t st_incoming_queue_bytes;	/* Incoming queue size: B. */
+	uintmax_t st_incoming_msgs_dropped;	/* # of msgs discarded due to
+						   incoming queue full. */
 	uintmax_t st_connection_drop;	/* Existing connections dropped. */
 	uintmax_t st_connect_fail;	/* Failed new connection attempts. */
-	uintmax_t st_elect_threads;	/* # of active election threads. */
-	uintmax_t st_max_elect_threads;	/* Max concurrent e-threads ever. */
+	u_int32_t st_elect_threads;	/* # of active election threads. */
+	u_int32_t st_max_elect_threads;	/* Max concurrent e-threads ever. */
+	u_int32_t st_site_participants;	/* # of repgroup participant sites. */
+	u_int32_t st_site_total;	/* # of repgroup total sites. */
+	u_int32_t st_site_views;	/* # of repgroup view sites. */
+	uintmax_t st_takeovers;		/* # of automatic listener takeovers. */
 };
 
 /* Replication Manager connection error. */
@@ -1238,7 +1284,7 @@ struct __db_sequence {
 	db_mutex_t	mtx_seq;	/* Mutex if sequence is threaded. */
 	DB_SEQ_RECORD	*seq_rp;	/* Pointer to current data. */
 	DB_SEQ_RECORD	seq_record;	/* Data from DB_SEQUENCE. */
-	int32_t		seq_cache_size; /* Number of values cached. */
+	u_int32_t	seq_cache_size; /* Number of values cached. */
 	db_seq_t	seq_last_value;	/* Last value cached. */
 	db_seq_t	seq_prev_value;	/* Last value returned. */
 	DBT		seq_key;	/* DBT pointing to sequence key. */
@@ -1250,8 +1296,8 @@ struct __db_sequence {
 	/* DB_SEQUENCE PUBLIC HANDLE LIST BEGIN */
 	int		(*close) __P((DB_SEQUENCE *, u_int32_t));
 	int		(*get) __P((DB_SEQUENCE *,
-			      DB_TXN *, int32_t, db_seq_t *, u_int32_t));
-	int		(*get_cachesize) __P((DB_SEQUENCE *, int32_t *));
+			      DB_TXN *, u_int32_t, db_seq_t *, u_int32_t));
+	int		(*get_cachesize) __P((DB_SEQUENCE *, u_int32_t *));
 	int		(*get_db) __P((DB_SEQUENCE *, DB **));
 	int		(*get_flags) __P((DB_SEQUENCE *, u_int32_t *));
 	int		(*get_key) __P((DB_SEQUENCE *, DBT *));
@@ -1261,7 +1307,7 @@ struct __db_sequence {
 	int		(*open) __P((DB_SEQUENCE *,
 			    DB_TXN *, DBT *, u_int32_t));
 	int		(*remove) __P((DB_SEQUENCE *, DB_TXN *, u_int32_t));
-	int		(*set_cachesize) __P((DB_SEQUENCE *, int32_t));
+	int		(*set_cachesize) __P((DB_SEQUENCE *, u_int32_t));
 	int		(*set_flags) __P((DB_SEQUENCE *, u_int32_t));
 	int		(*set_range) __P((DB_SEQUENCE *, db_seq_t, db_seq_t));
 	int		(*stat) __P((DB_SEQUENCE *,
@@ -1278,7 +1324,7 @@ struct __db_seq_stat { /* SHARED */
 	db_seq_t  st_last_value;	/* Last cached value. */
 	db_seq_t  st_min;		/* Minimum value. */
 	db_seq_t  st_max;		/* Maximum value. */
-	int32_t   st_cache_size;	/* Cache size. */
+	u_int32_t st_cache_size;	/* Cache size. */
 	u_int32_t st_flags;		/* Flag value. */
 };
 
@@ -1300,15 +1346,15 @@ typedef enum {
 
 #define	DB_RENAMEMAGIC	0x030800	/* File has been renamed. */
 
-#define	DB_BTREEVERSION	9		/* Current btree version. */
+#define	DB_BTREEVERSION	10		/* Current btree version. */
 #define	DB_BTREEOLDVER	8		/* Oldest btree version supported. */
 #define	DB_BTREEMAGIC	0x053162
 
-#define	DB_HASHVERSION	9		/* Current hash version. */
+#define	DB_HASHVERSION	10		/* Current hash version. */
 #define	DB_HASHOLDVER	7		/* Oldest hash version supported. */
 #define	DB_HASHMAGIC	0x061561
 
-#define	DB_HEAPVERSION	1		/* Current heap version. */
+#define	DB_HEAPVERSION	2		/* Current heap version. */
 #define	DB_HEAPOLDVER	1		/* Oldest heap version supported. */
 #define	DB_HEAPMAGIC	0x074582
 
@@ -1377,6 +1423,7 @@ typedef enum {
 #define	DB_LOCK_NOTGRANTED	(-30992)/* Lock unavailable. */
 #define	DB_LOG_BUFFER_FULL	(-30991)/* In-memory log buffer full. */
 #define	DB_LOG_VERIFY_BAD	(-30990)/* Log verification failed. */
+#define	DB_META_CHKSUM_FAIL	(-30968)/* Metadata page checksum failed. */
 #define	DB_NOSERVER		(-30989)/* Server panic return. */
 #define	DB_NOTFOUND		(-30988)/* Key/data pair not found (EOF). */
 #define	DB_OLD_VERSION		(-30987)/* Out-of-date version. */
@@ -1405,6 +1452,8 @@ typedef enum {
 #define	DB_DELETED		(-30897)/* Recovery file marked deleted. */
 #define	DB_EVENT_NOT_HANDLED	(-30896)/* Forward event to application. */
 #define	DB_NEEDSPLIT		(-30895)/* Page needs to be split. */
+#define	DB_NOINTMP		(-30886)/* Sequences not supported in temporary
+					   or in-memory databases. */
 #define	DB_REP_BULKOVF		(-30894)/* Rep bulk buffer overflow. */
 #define	DB_REP_LOGREADY		(-30893)/* Rep log ready for recovery. */
 #define	DB_REP_NEWMASTER	(-30892)/* We have learned of a new master. */
@@ -1415,6 +1464,13 @@ typedef enum {
 #define	DB_TXN_CKP		(-30888)/* Encountered ckp record in log. */
 #define	DB_VERIFY_FATAL		(-30887)/* DB->verify cannot proceed. */
 
+/*
+ * This exit status indicates that a BDB utility failed because it needed a
+ * resource which had been held by a process which crashed or otherwise did
+ * not exit cleanly.
+ */
+#define DB_EXIT_FAILCHK		3
+
 /* Database handle. */
 struct __db {
 	/*******************************************************
@@ -1426,7 +1482,7 @@ struct __db {
 					/* Callbacks. */
 	int (*db_append_recno) __P((DB *, DBT *, db_recno_t));
 	void (*db_feedback) __P((DB *, int, int));
-	int (*dup_compare) __P((DB *, const DBT *, const DBT *));
+	int (*dup_compare) __P((DB *, const DBT *, const DBT *, size_t *));
 
 	void	*app_private;		/* Application-private handle. */
 
@@ -1450,6 +1506,8 @@ struct __db {
 
 	u_int32_t adj_fileid;		/* File's unique ID for curs. adj. */
 
+	u_int32_t blob_threshold;	/* Blob threshold record size. */
+
 #define	DB_LOGFILEID_INVALID	-1
 	FNAME *log_filename;		/* File's naming info for logging. */
 
@@ -1593,6 +1651,12 @@ struct __db {
 	/* Reference to foreign -- set in the secondary. */
 	DB      *s_foreign;
 
+	DB		*blob_meta_db;	/* Databases holding blob metadata. */
+	DB_SEQUENCE	*blob_seq;	/* Sequence of blob ids. */
+	char		*blob_sub_dir;	/* Subdirectory for blob files */
+	db_seq_t	blob_file_id;	/* Id of the file blob directory. */
+	db_seq_t	blob_sdb_id;	/* Id of the subdb blob directory. */
+
 	/* API-private structure: used by DB 1.85, C++, Java, Perl and Tcl */
 	void	*api_internal;
 
@@ -1623,8 +1687,11 @@ struct __db {
 		void *(**)(void *, size_t), void (**)(void *)));
 	int  (*get_append_recno) __P((DB *, int (**)(DB *, DBT *, db_recno_t)));
 	int  (*get_assoc_flags) __P((DB *, u_int32_t *));
+	int  (*get_blob_dir) __P((DB *, const char **));
+	int  (*get_blob_sub_dir) __P((DB *, const char **));
+	int  (*get_blob_threshold) __P((DB *, u_int32_t *));
 	int  (*get_bt_compare)
-		__P((DB *, int (**)(DB *, const DBT *, const DBT *)));
+		__P((DB *, int (**)(DB *, const DBT *, const DBT *, size_t *)));
 	int  (*get_bt_compress) __P((DB *,
 		int (**)(DB *,
 		const DBT *, const DBT *, const DBT *, const DBT *, DBT *),
@@ -1637,7 +1704,7 @@ struct __db {
 	int  (*get_create_dir) __P((DB *, const char **));
 	int  (*get_dbname) __P((DB *, const char **, const char **));
 	int  (*get_dup_compare)
-		__P((DB *, int (**)(DB *, const DBT *, const DBT *)));
+		__P((DB *, int (**)(DB *, const DBT *, const DBT *, size_t *)));
 	int  (*get_encrypt_flags) __P((DB *, u_int32_t *));
 	DB_ENV *(*get_env) __P((DB *));
 	void (*get_errcall) __P((DB *,
@@ -1647,7 +1714,7 @@ struct __db {
 	int  (*get_feedback) __P((DB *, void (**)(DB *, int, int)));
 	int  (*get_flags) __P((DB *, u_int32_t *));
 	int  (*get_h_compare)
-		__P((DB *, int (**)(DB *, const DBT *, const DBT *)));
+		__P((DB *, int (**)(DB *, const DBT *, const DBT *, size_t *)));
 	int  (*get_h_ffactor) __P((DB *, u_int32_t *));
 	int  (*get_h_hash)
 		__P((DB *, u_int32_t (**)(DB *, const void *, u_int32_t)));
@@ -1688,8 +1755,10 @@ struct __db {
 	int  (*set_alloc) __P((DB *, void *(*)(size_t),
 		void *(*)(void *, size_t), void (*)(void *)));
 	int  (*set_append_recno) __P((DB *, int (*)(DB *, DBT *, db_recno_t)));
+	int  (*set_blob_dir) __P((DB *, const char *));
+	int  (*set_blob_threshold) __P((DB *, u_int32_t, u_int32_t));
 	int  (*set_bt_compare)
-		__P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+		__P((DB *, int (*)(DB *, const DBT *, const DBT *, size_t *)));
 	int  (*set_bt_compress) __P((DB *,
 		int (*)(DB *, const DBT *, const DBT *, const DBT *, const DBT *, DBT *),
 		int (*)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *)));
@@ -1699,7 +1768,7 @@ struct __db {
 	int  (*set_cachesize) __P((DB *, u_int32_t, u_int32_t, int));
 	int  (*set_create_dir) __P((DB *, const char *));
 	int  (*set_dup_compare)
-		__P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+		__P((DB *, int (*)(DB *, const DBT *, const DBT *, size_t *)));
 	int  (*set_encrypt) __P((DB *, const char *, u_int32_t));
 	void (*set_errcall) __P((DB *,
 		void (*)(const DB_ENV *, const char *, const char *)));
@@ -1708,7 +1777,7 @@ struct __db {
 	int  (*set_feedback) __P((DB *, void (*)(DB *, int, int)));
 	int  (*set_flags) __P((DB *, u_int32_t));
 	int  (*set_h_compare)
-		__P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+		__P((DB *, int (*)(DB *, const DBT *, const DBT *, size_t *)));
 	int  (*set_h_ffactor) __P((DB *, u_int32_t));
 	int  (*set_h_hash)
 		__P((DB *, u_int32_t (*)(DB *, const void *, u_int32_t)));
@@ -1808,13 +1877,34 @@ struct __db {
 	u_int32_t orig_flags;		   /* Flags at  open, for refresh */
 	u_int32_t flags;
 
-#define DB2_AM_EXCL		0x00000001 /* Exclusively lock the handle */ 
-#define DB2_AM_INTEXCL		0x00000002 /* Internal exclusive lock. */
-#define DB2_AM_NOWAIT		0x00000004 /* Do not wait for handle lock */ 
-	u_int32_t orig_flags2;		   /* Second flags word; for refresh */ 
+#define	DB2_AM_EXCL		0x00000001 /* Exclusively lock the handle */ 
+#define	DB2_AM_INTEXCL		0x00000002 /* Internal exclusive lock. */
+#define	DB2_AM_NOWAIT		0x00000004 /* Do not wait for handle lock */ 
 	u_int32_t flags2;		   /* Second flags word */
 };
 
+/* 
+ * Stream interface for blob files.
+ */
+struct __db_stream {
+	DBC		*dbc;	/* Cursor pointing to the db blob record. */
+	DB_FH		*fhp;
+
+	/* DB_STREAM PUBLIC HANDLE LIST BEGIN */
+	int  (*close) __P((DB_STREAM *, u_int32_t));
+	int  (*read) __P((DB_STREAM *, DBT *, db_off_t, u_int32_t, u_int32_t));
+	int  (*size) __P((DB_STREAM *, db_off_t *, u_int32_t));
+	int  (*write) __P((DB_STREAM *, DBT *, db_off_t, u_int32_t));
+	/* DB_STREAM PUBLIC HANDLE LIST END */
+
+	u_int32_t	flags;
+#define	DB_STREAM_READ		0x00000001 /* Stream is read only. */
+#define	DB_STREAM_WRITE		0x00000002 /* Stream is writeable. */
+#define	DB_STREAM_SYNC_WRITE	0x00000004 /* Sync file on each write. */
+	db_seq_t	blob_id;
+	db_off_t	file_size;
+};
+
 /*
  * Macros for bulk operations.  These are only intended for the C API.
  * For C++, use DbMultiple*Iterator or DbMultiple*Builder.
@@ -1889,7 +1979,7 @@ struct __db {
 		pointer = __p;						\
 	} while (0)
 
-#define DB_MULTIPLE_WRITE_INIT(pointer, dbt)				\
+#define	DB_MULTIPLE_WRITE_INIT(pointer, dbt)				\
 	do {								\
 		(dbt)->flags |= DB_DBT_BULK;				\
 		pointer = (u_int8_t *)(dbt)->data +			\
@@ -1897,7 +1987,7 @@ struct __db {
 		*(u_int32_t *)(pointer) = (u_int32_t)-1;		\
 	} while (0)
 
-#define DB_MULTIPLE_RESERVE_NEXT(pointer, dbt, writedata, writedlen)	\
+#define	DB_MULTIPLE_RESERVE_NEXT(pointer, dbt, writedata, writedlen)	\
 	do {								\
 		u_int32_t *__p = (u_int32_t *)(pointer);		\
 		u_int32_t __off = ((pointer) ==	(u_int8_t *)(dbt)->data +\
@@ -1914,7 +2004,7 @@ struct __db {
 		}							\
 	} while (0)
 
-#define DB_MULTIPLE_WRITE_NEXT(pointer, dbt, writedata, writedlen)	\
+#define	DB_MULTIPLE_WRITE_NEXT(pointer, dbt, writedata, writedlen)	\
 	do {								\
 		void *__destd;						\
 		DB_MULTIPLE_RESERVE_NEXT((pointer), (dbt),		\
@@ -1925,7 +2015,7 @@ struct __db {
 			memcpy(__destd, (writedata), (writedlen));	\
 	} while (0)
 
-#define DB_MULTIPLE_KEY_RESERVE_NEXT(pointer, dbt, writekey, writeklen, writedata, writedlen) \
+#define	DB_MULTIPLE_KEY_RESERVE_NEXT(pointer, dbt, writekey, writeklen, writedata, writedlen) \
 	do {								\
 		u_int32_t *__p = (u_int32_t *)(pointer);		\
 		u_int32_t __off = ((pointer) == (u_int8_t *)(dbt)->data +\
@@ -1948,7 +2038,7 @@ struct __db {
 		}							\
 	} while (0)
 
-#define DB_MULTIPLE_KEY_WRITE_NEXT(pointer, dbt, writekey, writeklen, writedata, writedlen) \
+#define	DB_MULTIPLE_KEY_WRITE_NEXT(pointer, dbt, writekey, writeklen, writedata, writedlen) \
 	do {								\
 		void *__destk, *__destd;				\
 		DB_MULTIPLE_KEY_RESERVE_NEXT((pointer), (dbt),		\
@@ -1962,7 +2052,7 @@ struct __db {
 		}							\
 	} while (0)
 
-#define DB_MULTIPLE_RECNO_WRITE_INIT(pointer, dbt)			\
+#define	DB_MULTIPLE_RECNO_WRITE_INIT(pointer, dbt)			\
 	do {								\
 		(dbt)->flags |= DB_DBT_BULK;				\
 		pointer = (u_int8_t *)(dbt)->data +			\
@@ -1970,7 +2060,7 @@ struct __db {
 		*(u_int32_t *)(pointer) = 0;				\
 	} while (0)
 
-#define DB_MULTIPLE_RECNO_RESERVE_NEXT(pointer, dbt, recno, writedata, writedlen) \
+#define	DB_MULTIPLE_RECNO_RESERVE_NEXT(pointer, dbt, recno, writedata, writedlen) \
 	do {								\
 		u_int32_t *__p = (u_int32_t *)(pointer);		\
 		u_int32_t __off = ((pointer) == (u_int8_t *)(dbt)->data +\
@@ -1988,7 +2078,7 @@ struct __db {
 		}							\
 	} while (0)
 
-#define DB_MULTIPLE_RECNO_WRITE_NEXT(pointer, dbt, recno, writedata, writedlen)\
+#define	DB_MULTIPLE_RECNO_WRITE_NEXT(pointer, dbt, recno, writedata, writedlen)\
 	do {								\
 		void *__destd;						\
 		DB_MULTIPLE_RECNO_RESERVE_NEXT((pointer), (dbt),	\
@@ -2003,7 +2093,7 @@ struct __db_heap_rid {
 	db_pgno_t pgno;			/* Page number. */
 	db_indx_t indx;			/* Index in the offset table. */
 };
-#define DB_HEAP_RID_SZ	(sizeof(db_pgno_t) + sizeof(db_indx_t))
+#define	DB_HEAP_RID_SZ	(sizeof(db_pgno_t) + sizeof(db_indx_t))
 
 /*******************************************************
  * Access method cursors.
@@ -2074,6 +2164,7 @@ struct __dbc {
 	int (*close) __P((DBC *));
 	int (*cmp) __P((DBC *, DBC *, int *, u_int32_t));
 	int (*count) __P((DBC *, db_recno_t *, u_int32_t));
+	int (*db_stream) __P((DBC *, DB_STREAM **, u_int32_t));
 	int (*del) __P((DBC *, u_int32_t));
 	int (*dup) __P((DBC *, DBC **, u_int32_t));
 	int (*get) __P((DBC *, DBT *, DBT *, u_int32_t));
@@ -2151,6 +2242,7 @@ struct __db_bt_stat { /* SHARED */
 	u_int32_t bt_pagecnt;		/* Page count. */
 	u_int32_t bt_pagesize;		/* Page size. */
 	u_int32_t bt_minkey;		/* Minkey value. */
+	u_int32_t bt_nblobs;		/* Number of blobs. */
 	u_int32_t bt_re_len;		/* Fixed-length record length. */
 	u_int32_t bt_re_pad;		/* Fixed-length record pad. */
 	u_int32_t bt_levels;		/* Tree levels. */
@@ -2179,7 +2271,7 @@ struct __db_compact {
 	u_int32_t	compact_deadlock;	/* Number of deadlocks. */
 	db_pgno_t	compact_pages_truncated; /* Pages truncated to OS. */
 	/* Internal. */
-	db_pgno_t	compact_truncate;	/* Page number for truncation */
+	db_pgno_t	compact_truncate;	/* Exchange pages above here. */
 };
 
 /* Hash statistics structure. */
@@ -2189,6 +2281,7 @@ struct __db_h_stat { /* SHARED */
 	u_int32_t hash_metaflags;	/* Metadata flags. */
 	u_int32_t hash_nkeys;		/* Number of unique keys. */
 	u_int32_t hash_ndata;		/* Number of data items. */
+	u_int32_t hash_nblobs;		/* Number of blobs. */
 	u_int32_t hash_pagecnt;		/* Page count. */
 	u_int32_t hash_pagesize;	/* Page size. */
 	u_int32_t hash_ffactor;		/* Fill factor specified at create. */
@@ -2208,6 +2301,7 @@ struct __db_heap_stat { /* SHARED */
 	u_int32_t heap_magic;		/* Magic number. */
 	u_int32_t heap_version;		/* Version number. */
 	u_int32_t heap_metaflags;	/* Metadata flags. */
+	u_int32_t heap_nblobs;		/* Number of blobs. */
 	u_int32_t heap_nrecs;		/* Number of records. */
 	u_int32_t heap_pagecnt;		/* Page count. */
 	u_int32_t heap_pagesize;	/* Page size. */
@@ -2267,21 +2361,15 @@ typedef enum {
  * Backup configuration types.
  */
 typedef enum {
-	DB_BACKUP_READ_COUNT = 1,
-	DB_BACKUP_READ_SLEEP = 2,
-	DB_BACKUP_SIZE = 3,
-	DB_BACKUP_WRITE_DIRECT = 4
+	DB_BACKUP_READ_COUNT=1,
+	DB_BACKUP_READ_SLEEP=2,
+	DB_BACKUP_SIZE=3,
+	DB_BACKUP_WRITE_DIRECT=4
 } DB_BACKUP_CONFIG;
 
 struct __db_env {
 	ENV *env;			/* Linked ENV structure */
 
-	/*
-	 * The DB_ENV structure can be used concurrently, so field access is
-	 * protected.
-	 */
-	db_mutex_t mtx_db_env;		/* DB_ENV structure mutex */
-
 					/* Error message callback */
 	void (*db_errcall) __P((const DB_ENV *, const char *, const char *));
 	FILE		*db_errfile;	/* Error message file stream */
@@ -2304,6 +2392,7 @@ struct __db_env {
 	char *(*thread_id_string) __P((DB_ENV *, pid_t, db_threadid_t, char *));
 
 	/* Application specified paths */
+	char	*db_blob_dir;		/* Blob file directory */
 	char	*db_log_dir;		/* Database log file directory */
 	char	*db_md_dir;		/* Persistent metadata directory */
 	char	*db_tmp_dir;		/* Database tmp file directory */
@@ -2327,6 +2416,8 @@ struct __db_env {
 
 	u_int32_t	verbose;	/* DB_VERB_XXX flags */
 
+	u_int32_t	blob_threshold;	/* Blob threshold record size */
+
 	/* Mutex configuration */
 	u_int32_t	mutex_align;	/* Mutex alignment */
 	u_int32_t	mutex_cnt;	/* Number of mutexes to configure */
@@ -2395,6 +2486,11 @@ struct __db_env {
 					 * build settings.
 					 */
         db_timeout_t	envreg_timeout; /* DB_REGISTER wait timeout */
+	/*
+	 * When failchk broadcasting is active, any wait for a mutex will wake
+	 * up this frequently in order to check whether the mutex has died.
+	 */
+	db_timeout_t	mutex_failchk_timeout;
 
 #define	DB_ENV_AUTO_COMMIT	0x00000001 /* DB_AUTO_COMMIT */
 #define	DB_ENV_CDB_ALLDB	0x00000002 /* CDB environment wide locking */
@@ -2414,8 +2510,8 @@ struct __db_env {
 #define	DB_ENV_TXN_SNAPSHOT	0x00008000 /* DB_TXN_SNAPSHOT set */
 #define	DB_ENV_TXN_WRITE_NOSYNC	0x00010000 /* DB_TXN_WRITE_NOSYNC set */
 #define	DB_ENV_YIELDCPU		0x00020000 /* DB_YIELDCPU set */
-#define DB_ENV_HOTBACKUP	0x00040000 /* DB_HOTBACKUP_IN_PROGRESS set */
-#define DB_ENV_NOFLUSH		0x00080000 /* DB_NOFLUSH set */
+#define	DB_ENV_HOTBACKUP	0x00040000 /* DB_HOTBACKUP_IN_PROGRESS set */
+#define	DB_ENV_NOFLUSH		0x00080000 /* DB_NOFLUSH set */
 	u_int32_t flags;
 
 	/* DB_ENV PUBLIC HANDLE LIST BEGIN */
@@ -2436,6 +2532,8 @@ struct __db_env {
 		void *(**)(void *, size_t), void (**)(void *)));
 	int  (*get_app_dispatch)
 		__P((DB_ENV *, int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
+	int  (*get_blob_dir) __P((DB_ENV *, const char **));
+	int  (*get_blob_threshold) __P((DB_ENV*, u_int32_t *));
 	int  (*get_cache_max) __P((DB_ENV *, u_int32_t *, u_int32_t *));
 	int  (*get_cachesize) __P((DB_ENV *, u_int32_t *, u_int32_t *, int *));
 	int  (*get_create_dir) __P((DB_ENV *, const char **));
@@ -2451,8 +2549,8 @@ struct __db_env {
 		void (**)(const DB_ENV *, const char *, const char *)));
 	void (*get_errfile) __P((DB_ENV *, FILE **));
 	void (*get_errpfx) __P((DB_ENV *, const char **));
-	int  (*get_flags) __P((DB_ENV *, u_int32_t *));
 	int  (*get_feedback) __P((DB_ENV *, void (**)(DB_ENV *, int, int)));
+	int  (*get_flags) __P((DB_ENV *, u_int32_t *));
 	int  (*get_home) __P((DB_ENV *, const char **));
 	int  (*get_intermediate_dir_mode) __P((DB_ENV *, const char **));
 	int  (*get_isalive) __P((DB_ENV *,
@@ -2568,17 +2666,23 @@ struct __db_env {
 	int  (*rep_set_timeout) __P((DB_ENV *, int, db_timeout_t));
 	int  (*rep_set_transport) __P((DB_ENV *, int, int (*)(DB_ENV *,
 		const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)));
+	int  (*rep_set_view) __P((DB_ENV *, int (*)(DB_ENV *,
+		const char *, int *, u_int32_t)));
 	int  (*rep_start) __P((DB_ENV *, DBT *, u_int32_t));
 	int  (*rep_stat) __P((DB_ENV *, DB_REP_STAT **, u_int32_t));
 	int  (*rep_stat_print) __P((DB_ENV *, u_int32_t));
 	int  (*rep_sync) __P((DB_ENV *, u_int32_t));
 	int  (*repmgr_channel) __P((DB_ENV *, int, DB_CHANNEL **, u_int32_t));
 	int  (*repmgr_get_ack_policy) __P((DB_ENV *, int *));
+	int  (*repmgr_get_incoming_queue_max)
+		__P((DB_ENV *, u_int32_t *, u_int32_t *));
 	int  (*repmgr_local_site) __P((DB_ENV *, DB_SITE **));
 	int  (*repmgr_msg_dispatch) __P((DB_ENV *,
 		void (*)(DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t),
 		u_int32_t));
 	int  (*repmgr_set_ack_policy) __P((DB_ENV *, int));
+	int  (*repmgr_set_incoming_queue_max)
+		__P((DB_ENV *, u_int32_t, u_int32_t));
 	int  (*repmgr_site)
 		__P((DB_ENV *, const char *, u_int, DB_SITE**, u_int32_t));
 	int  (*repmgr_site_by_eid) __P((DB_ENV *, int, DB_SITE**));
@@ -2590,6 +2694,8 @@ struct __db_env {
 		void *(*)(void *, size_t), void (*)(void *)));
 	int  (*set_app_dispatch)
 		__P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
+	int  (*set_blob_dir) __P((DB_ENV *, const char *));
+	int  (*set_blob_threshold) __P((DB_ENV *, u_int32_t, u_int32_t));
 	int  (*set_cache_max) __P((DB_ENV *, u_int32_t, u_int32_t));
 	int  (*set_cachesize) __P((DB_ENV *, u_int32_t, u_int32_t, int));
 	int  (*set_create_dir) __P((DB_ENV *, const char *));
@@ -2662,8 +2768,8 @@ struct __db_env {
 	/* DB_ENV PUBLIC HANDLE LIST END */
 
 	/* DB_ENV PRIVATE HANDLE LIST BEGIN */
-	int  (*prdbt) __P((DBT *, int,
-		const char *, void *, int (*)(void *, const void *), int, int));
+	int  (*prdbt) __P((DBT *, int, const char *, void *,
+		int (*)(void *, const void *), int, int, int));
 	/* DB_ENV PRIVATE HANDLE LIST END */
 };
 
diff --git a/src/dbinc/db_185.in b/src/dbinc/db_185.in
index 43735344..3aef2eca 100644
--- a/src/dbinc/db_185.in
+++ b/src/dbinc/db_185.in
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994
diff --git a/src/dbinc/db_am.h b/src/dbinc/db_am.h
index f34578c4..2b5c49d2 100644
--- a/src/dbinc/db_am.h
+++ b/src/dbinc/db_am.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -199,12 +199,16 @@ struct __db_foreign_info {
 #define	DB_IS_PRIMARY(dbp) (LIST_FIRST(&dbp->s_secondaries) != NULL)
 /*
  * A database should be required to be readonly if it's been explicitly
- * specified as such or if we're a client in a replicated environment
- * and the user did not specify DB_TXN_NOT_DURABLE.
+ * specified as such, if we're a client in a replicated environment
+ * and the user did not specify DB_TXN_NOT_DURABLE, or if we're a master
+ * in a replicated environment and the REP_F_READONLY_MASTER flag has been
+ * set in preparation for a preferred master takeover.
  */
 #define	DB_IS_READONLY(dbp)						\
     (F_ISSET(dbp, DB_AM_RDONLY) ||					\
-    (IS_REP_CLIENT((dbp)->env) && !F_ISSET((dbp), DB_AM_NOT_DURABLE)))
+    (IS_REP_CLIENT((dbp)->env) && !F_ISSET((dbp), DB_AM_NOT_DURABLE))	\
+    || (IS_REP_MASTER((dbp)->env) &&					\
+    F_ISSET((dbp)->env->rep_handle->region, REP_F_READONLY_MASTER)))
 
 #ifdef HAVE_COMPRESSION
 /*
diff --git a/src/dbinc/db_cxx.in b/src/dbinc/db_cxx.in
index 84fc0f88..5b29f7e8 100644
--- a/src/dbinc/db_cxx.in
+++ b/src/dbinc/db_cxx.in
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -76,6 +76,7 @@ class DbMpoolFile;                               // forward
 class DbPreplist;                                // forward
 class DbSequence;                                // forward
 class DbSite;                                    // forward
+class DbStream;                                  // forward
 class Dbt;                                       // forward
 class DbTxn;                                     // forward
 
@@ -159,13 +160,13 @@ extern "C" {
 	typedef void (*db_free_fcn_type)
 		(void *);
 	typedef int (*bt_compare_fcn_type)          /*C++ version available*/
-		(DB *, const DBT *, const DBT *);
+		(DB *, const DBT *, const DBT *, size_t *);
 	typedef size_t (*bt_prefix_fcn_type)        /*C++ version available*/
 		(DB *, const DBT *, const DBT *);
 	typedef int (*dup_compare_fcn_type)         /*C++ version available*/
-		(DB *, const DBT *, const DBT *);
+		(DB *, const DBT *, const DBT *, size_t *);
 	typedef int (*h_compare_fcn_type)          /*C++ version available*/
-		(DB *, const DBT *, const DBT *);
+		(DB *, const DBT *, const DBT *, size_t *);
 	typedef u_int32_t (*h_hash_fcn_type)        /*C++ version available*/
 		(DB *, const void *, u_int32_t);
 	typedef int (*pgin_fcn_type)
@@ -204,7 +205,10 @@ public:
 	virtual int get_alloc(
 	    db_malloc_fcn_type *, db_realloc_fcn_type *, db_free_fcn_type *);
 	virtual int get_append_recno(int (**)(Db *, Dbt *, db_recno_t));
-	virtual int get_bt_compare(int (**)(Db *, const Dbt *, const Dbt *));
+	virtual int get_blob_dir(const char **);
+	virtual int get_blob_threshold(u_int32_t *);
+	virtual int get_bt_compare(
+	    int (**)(Db *, const Dbt *, const Dbt *, size_t *));
 	virtual int get_bt_compress(
 	    int (**)(
 	    Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *),
@@ -215,7 +219,8 @@ public:
 	virtual int get_cachesize(u_int32_t *, u_int32_t *, int *);
 	virtual int get_create_dir(const char **);
 	virtual int get_dbname(const char **, const char **);
-	virtual int get_dup_compare(int (**)(Db *, const Dbt *, const Dbt *));
+	virtual int get_dup_compare(
+	    int (**)(Db *, const Dbt *, const Dbt *, size_t *));
 	virtual int get_encrypt_flags(u_int32_t *);
 	virtual void get_errcall(
 	    void (**)(const DbEnv *, const char *, const char *));
@@ -225,7 +230,8 @@ public:
 	virtual int get_flags(u_int32_t *);
 	virtual int get_heapsize(u_int32_t *, u_int32_t *);
 	virtual int get_heap_regionsize(u_int32_t *);
-	virtual int get_h_compare(int (**)(Db *, const Dbt *, const Dbt *));
+	virtual int get_h_compare(
+	    int (**)(Db *, const Dbt *, const Dbt *, size_t *));
 	virtual int get_h_ffactor(u_int32_t *);
 	virtual int get_h_hash(u_int32_t (**)(Db *, const void *, u_int32_t));
 	virtual int get_h_nelem(u_int32_t *);
@@ -261,8 +267,11 @@ public:
 	    db_malloc_fcn_type, db_realloc_fcn_type, db_free_fcn_type);
 	virtual void set_app_private(void *);
 	virtual int set_append_recno(int (*)(Db *, Dbt *, db_recno_t));
+	virtual int set_blob_dir(const char *);
+	virtual int set_blob_threshold(u_int32_t, u_int32_t);
 	virtual int set_bt_compare(bt_compare_fcn_type); /*deprecated*/
-	virtual int set_bt_compare(int (*)(Db *, const Dbt *, const Dbt *));
+	virtual int set_bt_compare(
+	    int (*)(Db *, const Dbt *, const Dbt *, size_t *));
 	virtual int set_bt_compress(
 	    int (*)
 	    (Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *),
@@ -273,7 +282,8 @@ public:
 	virtual int set_cachesize(u_int32_t, u_int32_t, int);
 	virtual int set_create_dir(const char *);
 	virtual int set_dup_compare(dup_compare_fcn_type); /*deprecated*/
-	virtual int set_dup_compare(int (*)(Db *, const Dbt *, const Dbt *));
+	virtual int set_dup_compare(
+	    int (*)(Db *, const Dbt *, const Dbt *, size_t *));
 	virtual int set_encrypt(const char *, u_int32_t);
 	virtual void set_errcall(
 	    void (*)(const DbEnv *, const char *, const char *));
@@ -284,7 +294,8 @@ public:
 	virtual int set_heapsize(u_int32_t, u_int32_t);
 	virtual int set_heap_regionsize(u_int32_t);
 	virtual int set_h_compare(h_compare_fcn_type); /*deprecated*/
-	virtual int set_h_compare(int (*)(Db *, const Dbt *, const Dbt *));
+	virtual int set_h_compare(
+	    int (*)(Db *, const Dbt *, const Dbt *, size_t *));
 	virtual int set_h_ffactor(u_int32_t);
 	virtual int set_h_hash(h_hash_fcn_type); /*deprecated*/
 	virtual int set_h_hash(u_int32_t (*)(Db *, const void *, u_int32_t));
@@ -383,16 +394,16 @@ public:
 	int (*associate_callback_)(Db *, const Dbt *, const Dbt *, Dbt *);
 	int (*associate_foreign_callback_)
 	    (Db *, const Dbt *, Dbt *, const Dbt *, int *);
-	int (*bt_compare_callback_)(Db *, const Dbt *, const Dbt *);
+	int (*bt_compare_callback_)(Db *, const Dbt *, const Dbt *, size_t *);
 	int (*bt_compress_callback_)(
 	    Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *);
 	int (*bt_decompress_callback_)(
 	    Db *, const Dbt *, const Dbt *, Dbt *, Dbt *, Dbt *);
 	size_t (*bt_prefix_callback_)(Db *, const Dbt *, const Dbt *);
 	u_int32_t (*db_partition_callback_)(Db *, Dbt *);
-	int (*dup_compare_callback_)(Db *, const Dbt *, const Dbt *);
+	int (*dup_compare_callback_)(Db *, const Dbt *, const Dbt *, size_t *);
 	void (*feedback_callback_)(Db *, int, int);
-	int (*h_compare_callback_)(Db *, const Dbt *, const Dbt *);
+	int (*h_compare_callback_)(Db *, const Dbt *, const Dbt *, size_t *);
 	u_int32_t (*h_hash_callback_)(Db *, const void *, u_int32_t);
 };
 
@@ -407,6 +418,7 @@ public:
 	int close();
 	int cmp(Dbc *other_csr, int *result, u_int32_t flags);
 	int count(db_recno_t *countp, u_int32_t flags);
+	int db_stream(DbStream **dbsp, u_int32_t flags);
 	int del(u_int32_t flags);
 	int dup(Dbc** cursorp, u_int32_t flags);
 	int get(Dbt* key, Dbt *data, u_int32_t flags);
@@ -527,6 +539,10 @@ public:
 	    int (*)(DbEnv *, const char *, void *));
 	virtual int get_backup_config(DB_BACKUP_CONFIG, u_int32_t *);
 	virtual int set_backup_config(DB_BACKUP_CONFIG, u_int32_t);
+	virtual int get_blob_dir(const char **);
+	virtual int set_blob_dir(const char *);
+	virtual int get_blob_threshold(u_int32_t *);
+	virtual int set_blob_threshold(u_int32_t, u_int32_t);
 	virtual int get_cachesize(u_int32_t *, u_int32_t *, int *);
 	virtual int set_cachesize(u_int32_t, u_int32_t, int);
 	virtual int get_cache_max(u_int32_t *, u_int32_t *);
@@ -761,10 +777,16 @@ public:
 	virtual int rep_set_priority(u_int32_t priority);
 	virtual int rep_get_timeout(int which, db_timeout_t *timeout);
 	virtual int rep_set_timeout(int which, db_timeout_t timeout);
+	virtual int rep_set_view(int (*)(DbEnv *,
+	    const char *, int *, u_int32_t));
 	virtual int repmgr_channel(int eid, DbChannel **channel,
 	    u_int32_t flags);
 	virtual int repmgr_get_ack_policy(int *policy);
 	virtual int repmgr_set_ack_policy(int policy);
+	virtual int repmgr_get_incoming_queue_max(u_int32_t *gbytesp,
+	    u_int32_t *bytesp);
+	virtual int repmgr_set_incoming_queue_max(u_int32_t gbytes,
+	    u_int32_t bytes);
 	virtual int repmgr_local_site(DbSite **site);
 	virtual int repmgr_msg_dispatch(void (*) (DbEnv *,
 	    DbChannel *, Dbt *, u_int32_t, u_int32_t), u_int32_t flags);
@@ -824,6 +846,8 @@ public:
 	static int _backup_write_intercept(DB_ENV *dbenv, u_int32_t off_gbytes,
 	    u_int32_t off_bytes, u_int32_t size, u_int8_t *buf, void *handle);
 	static void _paniccall_intercept(DB_ENV *dbenv, int errval);
+	static int _partial_rep_intercept(DB_ENV *dbenv,
+	    const char *name, int *result, u_int32_t flags);
 	static void _feedback_intercept(DB_ENV *dbenv, int opcode, int pct);
 	static void  _event_func_intercept(DB_ENV *dbenv, u_int32_t, void *);
 	static int _isalive_intercept(DB_ENV *dbenv, pid_t pid,
@@ -872,6 +896,7 @@ private:
 	void (*feedback_callback_)(DbEnv *, int, int);
 	void (*message_callback_)(const DbEnv *, const char *);
 	void (*paniccall_callback_)(DbEnv *, int);
+	int (*partial_rep_callback_)(DbEnv *, const char *, int *, u_int32_t);
 	void (*event_func_callback_)(DbEnv *, u_int32_t, void *);
 	int (*rep_send_callback_)(DbEnv *, const Dbt *, const Dbt *,
 	    const DbLsn *, int, u_int32_t);
@@ -1057,9 +1082,9 @@ public:
 	int stat(DB_SEQUENCE_STAT **sp, u_int32_t flags);
 	int stat_print(u_int32_t flags);
 
-	int get(DbTxn *txnid, int32_t delta, db_seq_t *retp, u_int32_t flags);
-	int get_cachesize(int32_t *sizep);
-	int set_cachesize(int32_t size);
+	int get(DbTxn *txnid, u_int32_t delta, db_seq_t *retp, u_int32_t flags);
+	int get_cachesize(u_int32_t *sizep);
+	int set_cachesize(u_int32_t size);
 	int get_flags(u_int32_t *flagsp);
 	int set_flags(u_int32_t flags);
 	int get_range(db_seq_t *minp, db_seq_t *maxp);
@@ -1137,6 +1162,34 @@ private:
 };
 
 //
+// DbStream
+//
+class _exported DbStream : protected DB_STREAM
+{
+	friend class Dbc;
+
+public:
+	int close(u_int32_t flags);
+	int read(Dbt *data, db_off_t offset, u_int32_t size, u_int32_t flags);
+	int size(db_off_t *size, u_int32_t flags);
+	int write(Dbt *data, db_off_t offset, u_int32_t flags);
+
+private:
+	// No data is permitted in this class (see comment at top)
+
+	// Note: use Dbc::dbstream() to get pointers to a DbStream,
+	// and call Dbstream::close() rather than delete to release them.
+	//
+	DbStream();
+	~DbStream();
+
+	// no copying
+	DbStream(const DbStream &);
+	DbStream &operator = (const DbStream &);
+
+};
+
+//
 // Transaction
 //
 class _exported DbTxn
@@ -1245,6 +1298,7 @@ class _exported Dbt : private DBT
 	friend class DbEnv;
 	friend class DbLogc;
 	friend class DbSequence;
+	friend class DbStream;
 
 public:
 	// key/data
diff --git a/src/dbinc/db_dispatch.h b/src/dbinc/db_dispatch.h
index b6382871..b3aedab1 100644
--- a/src/dbinc/db_dispatch.h
+++ b/src/dbinc/db_dispatch.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1995, 1996
diff --git a/src/dbinc/db_int.in b/src/dbinc/db_int.in
index 42439107..593deef6 100644
--- a/src/dbinc/db_int.in
+++ b/src/dbinc/db_int.in
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -73,6 +73,17 @@
 
 #endif /* !HAVE_SYSTEM_INCLUDE_FILES */
 
+/*
+ * The Windows compiler needs to be told about structures that are available
+ * outside a dll.
+ */
+#if defined(DB_WIN32) && defined(_MSC_VER) && \
+    !defined(DB_CREATE_DLL) && !defined(_LIB)
+#define	__DB_IMPORT __declspec(dllimport)
+#else
+#define	__DB_IMPORT
+#endif
+
 #ifdef DB_WIN32
 #include "dbinc/win_db.h"
 #endif
@@ -88,22 +99,12 @@
 #include "dbinc/queue.h"
 #include "dbinc/shqueue.h"
 #include "dbinc/perfmon.h"
+#include "dbinc/clock.h"
 
 #if defined(__cplusplus)
 extern "C" {
 #endif
 
-/*
- * The Windows compiler needs to be told about structures that are available
- * outside a dll.
- */
-#if defined(DB_WIN32) && defined(_MSC_VER) && \
-    !defined(DB_CREATE_DLL) && !defined(_LIB)
-#define	__DB_IMPORT __declspec(dllimport)
-#else
-#define	__DB_IMPORT
-#endif
-
 /*******************************************************
  * Forward structure declarations.
  *******************************************************/
@@ -366,22 +367,27 @@ typedef struct __fn {
 /*
  * Structure used for callback message aggregation.
  *
- * Display values in XXX_stat_print calls.
+ * DB_MSGBUF_FLUSH displays values in XXX_stat_print calls.
+ * DB_MSGBUF_REP_FLUSH displays replication system messages.
  */
 typedef struct __db_msgbuf {
 	char *buf;			/* Heap allocated buffer. */
 	char *cur;			/* Current end of message. */
 	size_t len;			/* Allocated length of buffer. */
+	int flags;
 } DB_MSGBUF;
+#define DB_MSGBUF_PREALLOCATED		0x0001
+
 #define	DB_MSGBUF_INIT(a) do {						\
 	(a)->buf = (a)->cur = NULL;					\
-	(a)->len = 0;							\
+	(a)->len = (a)->flags = 0;					\
 } while (0)
 #define	DB_MSGBUF_FLUSH(env, a) do {					\
 	if ((a)->buf != NULL) {						\
 		if ((a)->cur != (a)->buf)				\
 			__db_msg(env, "%s", (a)->buf);			\
-		__os_free(env, (a)->buf);				\
+ 		if (!F_ISSET((a), DB_MSGBUF_PREALLOCATED))		\
+ 			__os_free(env, (a)->buf);			\
 		DB_MSGBUF_INIT(a);					\
 	}								\
 } while (0)
@@ -392,18 +398,14 @@ typedef struct __db_msgbuf {
 		if (regular_msg)					\
 			DB_MSGBUF_FLUSH(env, a);			\
 		else {							\
-			__os_free(env, (a)->buf);			\
+ 			if (!F_ISSET((a), DB_MSGBUF_PREALLOCATED))	\
+ 				__os_free(env, (a)->buf);		\
 			DB_MSGBUF_INIT(a);				\
 		}							\
 	}								\
 } while (0)
-#define	STAT_FMT(msg, fmt, type, v) do {				\
-	DB_MSGBUF __mb;							\
-	DB_MSGBUF_INIT(&__mb);						\
-	__db_msgadd(env, &__mb, fmt, (type)(v));			\
-	__db_msgadd(env, &__mb, "\t%s", msg);				\
-	DB_MSGBUF_FLUSH(env, &__mb);					\
-} while (0)
+#define	STAT_FMT(msg, fmt, type, v)					\
+	__db_msg(env, fmt "\t%s", (type)(v), msg);
 #define	STAT_HEX(msg, v)						\
 	__db_msg(env, "%#lx\t%s", (u_long)(v), msg)
 #define	STAT_ISSET(msg, p)						\
@@ -441,25 +443,21 @@ typedef struct __db_msgbuf {
  *
  * Error message IDs are automatically assigned by dist/s_message_id script.
  */
-#ifdef HAVE_LOCALIZATION
-#define _(msg)	msg	/* Replace with localization function. */
-#else
-#define _(msg)	msg
-#endif
-
 #ifdef HAVE_STRIPPED_MESSAGES
 #define DB_STR_C(msg, fmt)	fmt
 #else
-#define DB_STR_C(msg, fmt)	_(msg)
+#define DB_STR_C(msg, fmt)	msg
 #endif
 
-#define DB_MSGID(id)		"BDB" id
-
-#define DB_STR(id, msg)		DB_MSGID(id) " " DB_STR_C(msg, "")
-
-#define DB_STR_A(id, msg, fmt)	DB_MSGID(id) " " DB_STR_C(msg, fmt)
+#ifdef HAVE_LOCALIZATION
+#define _(msg)	(msg)	/* Replace with localization function. */
+#else
+#define _(msg)	msg
+#endif
 
-#define DB_STR_P(msg)		_(msg)
+#define DB_STR(id, msg)			_("BDB" id " " DB_STR_C(msg, ""))
+#define DB_STR_A(id, msg, fmt)	_("BDB" id " " DB_STR_C(msg, fmt))
+#define DB_STR_P(msg)			_(msg)
 
 /*
  * There are quite a few places in Berkeley DB where we want to initialize
@@ -542,6 +540,7 @@ typedef struct __db_msgbuf {
 /* Type passed to __db_appname(). */
 typedef enum {
 	DB_APP_NONE=0,			/* No type (region). */
+	DB_APP_BLOB,			/* Blob file. */
 	DB_APP_DATA,			/* Data file. */
 	DB_APP_LOG,			/* Log file. */
 	DB_APP_META,			/* Persistent metadata file. */
@@ -612,8 +611,13 @@ typedef enum {
 	if (F_ISSET((env), ENV_OPEN_CALLED))				\
 		ENV_REQUIRES_CONFIG(env, handle, i, flags)
 
+/*
+ * The ENV_ENTER and ENV_LEAVE macros announce to other threads that
+ * the current thread is entering or leaving the BDB api.
+ */
 #define	ENV_ENTER_RET(env, ip, ret) do {				\
 	ret = 0;							\
+	DISCARD_HISTORY(env);						\
 	PANIC_CHECK_RET(env, ret);					\
  	if (ret == 0) {							\
 		if ((env)->thr_hashtab == NULL)				\
@@ -631,6 +635,10 @@ typedef enum {
 		return (__ret);						\
 } while (0)
 
+/*
+ * Publicize the current thread's intention to run failchk. This invokes
+ * DB_ENV->is_alive() in the mutex code, to avoid hanging on dead processes.
+ */
 #define	FAILCHK_THREAD(env, ip) do {					\
 	if ((ip) != NULL)						\
 		(ip)->dbth_state = THREAD_FAILCHK;			\
@@ -638,20 +646,15 @@ typedef enum {
 
 #define	ENV_GET_THREAD_INFO(env, ip) ENV_ENTER(env, ip)
 
-#ifdef DIAGNOSTIC
 #define	ENV_LEAVE(env, ip) do {						\
-	if ((ip) != NULL) {						\
-		DB_ASSERT(env, ((ip)->dbth_state == THREAD_ACTIVE  ||	\
-		    (ip)->dbth_state == THREAD_FAILCHK));		\
+	if ((ip) != NULL) {	\
+		DB_ASSERT((env), (ip)->dbth_state == THREAD_ACTIVE  ||	\
+		    (ip)->dbth_state == THREAD_FAILCHK);		\
 		(ip)->dbth_state = THREAD_OUT;				\
 	}								\
 } while (0)
-#else
-#define	ENV_LEAVE(env, ip) do {						\
-	if ((ip) != NULL)						\
-		(ip)->dbth_state = THREAD_OUT;				\
-} while (0)
-#endif
+
+
 #ifdef DIAGNOSTIC
 #define	CHECK_THREAD(env) do {						\
 	if ((env)->thr_hashtab != NULL)					\
@@ -688,6 +691,23 @@ typedef struct __pin_list {
 } PIN_LIST;
 #define	PINMAX 4
 
+typedef enum {
+	MUTEX_ACTION_UNLOCKED=0,
+	MUTEX_ACTION_INTEND_SHARE,	/* Thread is attempting a read-lock. */
+	MUTEX_ACTION_SHARED		/* Thread has gotten a read lock. */
+} MUTEX_ACTION;
+
+typedef struct __mutex_state {	/* SHARED */
+	db_mutex_t	mutex;
+	MUTEX_ACTION	action;
+#ifdef DIAGNOSTIC
+	db_timespec	when;
+#endif
+} MUTEX_STATE;
+
+#define MUTEX_STATE_MAX 10	/* It only needs enough for shared latches. */
+
+
 struct __db_thread_info { /* SHARED */
 	pid_t		dbth_pid;
 	db_threadid_t	dbth_tid;
@@ -707,11 +727,25 @@ struct __db_thread_info { /* SHARED */
 	u_int16_t	dbth_pinmax;	/* Number of slots allocated. */
 	roff_t		dbth_pinlist;	/* List of pins. */
 	PIN_LIST	dbth_pinarray[PINMAX];	/* Initial array of slots. */
+
+	/*
+	 * While thread tracking is active this caches one of the lockers
+	 * created by each thread. This locker remains allocated, with an
+	 * invalid id, even after the locker id is freed.
+	 */
+	roff_t		dbth_local_locker;
+	/*
+	 * Each latch shared by this thread has an entry here.  Exclusive
+	 * ownership, for both latches and mutexes, are in the DB_MUTEX.
+	 */
+	MUTEX_STATE	dbth_latches[MUTEX_STATE_MAX];
 #ifdef DIAGNOSTIC
 	roff_t		dbth_locker;	/* Current locker for this thread. */
 	u_int32_t	dbth_check_off;	/* Count of number of LOCK_OFF calls. */
 #endif
+	db_timespec	dbth_failtime;	/* Time when its crash was detected. */
 };
+
 #ifdef DIAGNOSTIC
 #define LOCK_CHECK_OFF(ip) if ((ip) != NULL)				\
 	(ip)->dbth_check_off++
@@ -729,7 +763,7 @@ struct __db_thread_info { /* SHARED */
 #define LOCK_CHECK(dbc, pgno, mode)	NOP_STATEMENT
 #endif
 
-typedef struct __env_thread_info {
+typedef struct __env_thread_info { /* SHARED */
 	u_int32_t	thr_count;
 	u_int32_t	thr_init;
 	u_int32_t	thr_max;
@@ -803,6 +837,11 @@ struct __env {
 #define ENV_DEF_DATA_LEN		100
 	u_int32_t data_len;		/* Data length in __db_prbytes. */
 
+	/* Registered processes */
+	size_t	num_active_pids;	/* number of entries in active_pids */
+	size_t	size_active_pids;	/* allocated size of active_pids */
+	pid_t	*active_pids;		/* array active pids */
+
 	/* Thread tracking */
 	u_int32_t	 thr_nbucket;	/* Number of hash buckets */
 	DB_HASHTAB	*thr_hashtab;	/* Hash table of DB_THREAD_INFO */
@@ -866,6 +905,7 @@ struct __env {
 #define	DB_TEST_PREOPEN		 10	/* before __os_open */
 #define	DB_TEST_REPMGR_PERM	 11	/* repmgr perm/archiving tests */
 #define	DB_TEST_SUBDB_LOCKS	 12	/* subdb locking tests */
+#define	DB_TEST_REPMGR_HEARTBEAT 13	/* repmgr stop sending heartbeats */
 	int	test_abort;		/* Abort value for testing */
 	int	test_check;		/* Checkpoint value for testing */
 	int	test_copy;		/* Copy value for testing */
@@ -881,7 +921,9 @@ struct __env {
 #define	ENV_REF_COUNTED		0x00000100 /* Region references this handle */
 #define	ENV_SYSTEM_MEM		0x00000200 /* DB_SYSTEM_MEM set */
 #define	ENV_THREAD		0x00000400 /* DB_THREAD set */
-#define ENV_FORCE_TXN_BULK	0x00000800 /* Txns use bulk mode-for testing */
+#define	ENV_FORCE_TXN_BULK	0x00000800 /* Txns use bulk mode-for testing */
+#define	ENV_REMEMBER_PANIC	0x00001000 /* Panic was on during cleanup. */
+#define	ENV_FORCESYNCENV	0x00002000 /* Force msync on closing. */
 	u_int32_t flags;
 };
 
@@ -1106,7 +1148,6 @@ typedef struct __dbpginfo {
 @db_int_def@
 
 #include "dbinc/globals.h"
-#include "dbinc/clock.h"
 #include "dbinc/debug.h"
 #include "dbinc/region.h"
 #include "dbinc_auto/env_ext.h"
@@ -1118,6 +1159,7 @@ typedef struct __dbpginfo {
 #include "dbinc/os.h"
 #include "dbinc_auto/clib_ext.h"
 #include "dbinc_auto/common_ext.h"
+#include "dbinc_auto/blob_ext.h"
 
 /*******************************************************
  * Remaining Log.
diff --git a/src/dbinc/db_join.h b/src/dbinc/db_join.h
index aecf059a..8f22adcb 100644
--- a/src/dbinc/db_join.h
+++ b/src/dbinc/db_join.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbinc/db_page.h b/src/dbinc/db_page.h
index 2d4de2e5..4694c4cf 100644
--- a/src/dbinc/db_page.h
+++ b/src/dbinc/db_page.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -93,6 +93,7 @@ typedef struct _dbmeta33 {
 	u_int8_t  uid[DB_FILE_ID_LEN];
 } DBMETA33, DBMETA;
 
+
 /************************************************************************
  BTREE METADATA PAGE LAYOUT
  ************************************************************************/
@@ -113,7 +114,13 @@ typedef struct _btmeta33 {
 	u_int32_t re_len;	/* 80-83: Recno: fixed-length record length. */
 	u_int32_t re_pad;	/* 84-87: Recno: fixed-length record pad. */
 	u_int32_t root;		/* 88-91: Root page. */
-	u_int32_t unused2[92];	/* 92-459: Unused space. */
+	u_int32_t blob_threshold;
+				/* 92-95: Minimum blob file size. */
+	u_int32_t blob_file_lo;	/* 96-99: Blob file dir id lo. */
+	u_int32_t blob_file_hi;	/* 100-103: Blob file dir id hi. */
+	u_int32_t blob_sdb_lo;	/* 104-107: Blob sdb dir id lo */
+	u_int32_t blob_sdb_hi;	/* 108-111: Blob sdb dir id hi */
+	u_int32_t unused2[87];	/* 112-459: Unused space. */
 	u_int32_t crypto_magic;		/* 460-463: Crypto magic number */
 	u_int32_t trash[3];		/* 464-475: Trash space - Do not use */
 	u_int8_t iv[DB_IV_BYTES];	/* 476-495: Crypto IV */
@@ -142,7 +149,13 @@ typedef struct _hashmeta33 {
 #define	NCACHED	32		/* number of spare points */
 				/* 96-223: Spare pages for overflow */
 	u_int32_t spares[NCACHED];
-	u_int32_t unused[59];	/* 224-459: Unused space */
+	u_int32_t blob_threshold;
+				/* 224-227: Minimum blob file size. */
+	u_int32_t blob_file_lo;	/* 228-231: Blob file dir id lo. */
+	u_int32_t blob_file_hi;	/* 232-235: Blob file dir id hi. */
+	u_int32_t blob_sdb_lo;	/* 236-239: Blob sdb dir id lo. */
+	u_int32_t blob_sdb_hi;	/* 240-243: Blob sdb dir id hi. */
+	u_int32_t unused[54];	/* 244-459: Unused space */
 	u_int32_t crypto_magic;	/* 460-463: Crypto magic number */
 	u_int32_t trash[3];	/* 464-475: Trash space - Do not use */
 	u_int8_t iv[DB_IV_BYTES];	/* 476-495: Crypto IV */
@@ -168,7 +181,10 @@ typedef struct _heapmeta {
 	u_int32_t gbytes;		/* 80-83: GBytes for fixed size heap. */
 	u_int32_t bytes;		/* 84-87: Bytes for fixed size heap. */
 	u_int32_t region_size;		/* 88-91: Max region size. */
-	u_int32_t unused2[92];		/* 92-459: Unused space.*/
+	u_int32_t blob_threshold;	/* 92-95: Minimum blob file size. */
+	u_int32_t blob_file_lo;		/* 96-97: Blob file dir id lo. */
+	u_int32_t blob_file_hi;		/* 98-101: Blob file dir id hi. */
+	u_int32_t unused2[89];		/* 102-459: Unused space.*/
 	u_int32_t crypto_magic;		/* 460-463: Crypto magic number */
 	u_int32_t trash[3];		/* 464-475: Trash space - Do not use */
 	u_int8_t  iv[DB_IV_BYTES];	/* 476-495: Crypto IV */
@@ -371,6 +387,7 @@ typedef struct __heaphdr {
 #define HEAP_RECSPLIT 0x01 /* Heap data record is split */
 #define HEAP_RECFIRST 0x02 /* First piece of a split record */
 #define HEAP_RECLAST  0x04 /* Last piece of a split record */
+#define HEAP_RECBLOB  0x08 /* Record refers to a blob */
 	u_int8_t flags;		/* 00: Flags describing record. */
 	u_int8_t unused;	/* 01: Padding. */
 	u_int16_t size;		/* 02-03: The size of the stored data piece. */
@@ -384,8 +401,35 @@ typedef struct __heaphdrsplt {
 	u_int16_t unused;	/* 14-15: Padding. */
 } HEAPSPLITHDR;
 
+/*
+ * HEAPBLOB, the blob database record for heap.
+ * Saving bytes is not a concern for the blob record type - if too many
+ * fit onto a single page, then we're likely to introduce unnecessary
+ * contention for blobs. Using blobs implies storing large items, thus slightly
+ * more per-item overhead is acceptable.
+ * If this proves untrue, the crypto section of the record could be optional.
+ * encoding, lsn, encryption, and checksum fields are unused at the moment, but
+ * included to make adding those features easier.
+ */
+typedef struct _heapblob {
+	HEAPHDR std_hdr;		/* 00-03: The standard data header */
+	u_int8_t  encoding;		/*    04: Encoding of blob file. */
+	u_int8_t  unused[7];		/* 05-11: Padding, unused. */
+	u_int8_t  chksum[DB_MAC_KEY];	/* 12-31: Checksum */
+	u_int8_t  iv[DB_IV_BYTES];	/* 32-47: IV */
+	DB_LSN    lsn;			/* 48-55: LSN for blob file update. */
+	u_int64_t id;			/* 56-63: Blob file identifier. */
+	u_int64_t size;			/* 64-71: Blob file size. */
+	u_int64_t file_id;		/* 72-80: File directory. */
+} HEAPBLOBHDR, HEAPBLOBHDR60P1;
+
 #define HEAP_HDRSIZE(hdr) 					\
-	(F_ISSET((hdr), HEAP_RECSPLIT) ? sizeof(HEAPSPLITHDR) : sizeof(HEAPHDR))
+	(F_ISSET((hdr), HEAP_RECSPLIT) ? sizeof(HEAPSPLITHDR) :	\
+	sizeof(HEAPHDR))
+
+#define HEAPBLOBREC_SIZE		(sizeof(HEAPBLOBHDR))
+#define HEAPBLOBREC_DSIZE		(sizeof(HEAPBLOBHDR) - sizeof(HEAPHDR))
+#define HEAPBLOBREC_DATA(p)		(((u_int8_t *)p) + sizeof(HEAPHDR))
 
 #define HEAPPG_SZ(dbp)			       			\
 	(F_ISSET((dbp), DB_AM_ENCRYPT) ? HEAPPG_SEC :		\
@@ -441,12 +485,12 @@ typedef struct __heaphdrsplt {
 	
 /* Return the amount of free space on a heap data page. */
 #define HEAP_FREESPACE(dbp, p)                                  \
-	(HOFFSET(p) - HEAPPG_SZ(dbp) -				\
+	((HOFFSET(p) - HEAPPG_SZ(dbp)) -			\
 	(NUM_ENT(p) == 0 ? 0 : ((HEAP_HIGHINDX(p) + 1) * sizeof(db_indx_t))))
 
 /* The maximum amount of data that can fit on an empty heap data page. */
 #define HEAP_MAXDATASIZE(dbp)					\
-	((dbp)->pgsize - HEAPPG_SZ(dbp) - sizeof(db_indx_t))
+	(((dbp)->pgsize - HEAPPG_SZ(dbp)) - sizeof(db_indx_t))
 
 #define HEAP_FREEINDX(p)	(((HEAPPG *)p)->free_indx)
 #define HEAP_HIGHINDX(p)	(((HEAPPG *)p)->high_indx)
@@ -549,9 +593,9 @@ typedef struct _qpage {
  *	The amount of overflow data stored on each page is stored in the
  *	hf_offset field.
  *
- *	The implementation reference counts overflow items as it's possible
- *	for them to be promoted onto btree internal pages.  The reference
- *	count is stored in the entries field.
+ *	Before 4.3 the implementation reference counted overflow items as it
+ *	once was possible for them to be promoted onto btree internal pages.
+ *	The reference count is stored in the entries field. 
  */
 #define	OV_LEN(p)	(((PAGE *)p)->hf_offset)
 #define	OV_REF(p)	(((PAGE *)p)->entries)
@@ -571,6 +615,7 @@ typedef struct _qpage {
 #define	H_DUPLICATE	2	/* Duplicate key/data item. */
 #define	H_OFFPAGE	3	/* Overflow key/data item. */
 #define	H_OFFDUP	4	/* Overflow page of duplicates. */
+#define	H_BLOB		5	/* Blob file data item. */
 
 /*
  * !!!
@@ -685,6 +730,78 @@ typedef struct _hoffdup {
  */
 #define	HOFFDUP_SIZE		(sizeof(HOFFDUP))
 
+/*
+ * The fifth type is the H_BLOB, represented by the HBLOB structure.
+ * Saving bytes is not a concern for the blob record type - if too many
+ * fit onto a single page, then we're likely to introduce unnecessary
+ * contention for blobs. Using blobs implies storing large items, thus slightly
+ * more per-item overhead is acceptable.
+ * If this proves untrue, the crypto section of the record could be optional.
+ * encoding, encryption, and checksum fields are unused at the moment, but
+ * included to make adding those features easier.
+ */
+typedef struct _hblob {
+	u_int8_t  type;			/*    00: Page type and delete flag. */
+	u_int8_t  encoding;		/*    01: Encoding of blob file. */
+	u_int8_t  unused[10];		/* 02-11: Padding, unused. */
+	u_int8_t  chksum[DB_MAC_KEY];	/* 12-31: Checksum */
+	u_int8_t  iv[DB_IV_BYTES];	/* 32-47: IV */
+	u_int64_t id;			/* 48-55: Blob file identifier. */
+	u_int64_t size;			/* 56-63: Blob file size. */
+	u_int64_t file_id;		/* 64-71: File directory. */
+	u_int64_t sdb_id;		/* 72-79: Subdb that owns this blob. */
+} HBLOB, HBLOB60P1;
+
+#define	HBLOB_ID(p)	(((u_int8_t *)p) + SSZ(HBLOB, id))
+#define	HBLOB_FILE_ID(p)	(((u_int8_t *)p) + SSZ(HBLOB, file_id))
+
+/*
+ * Return a off_t version of the u_int64_t blob size.
+ * Since off_t can be a 32 or 64 integer on different systems, this macro
+ * is used to catch cases of overflow.
+ */
+#define	GET_BLOB_SIZE(e, p, o, ret)	do {				\
+	DB_ASSERT((e), sizeof(o) <= 8);					\
+	if (sizeof(o) == 8) {						\
+		(o) = (off_t)(p).size;					\
+	} else {							\
+		if ((p).size > INT_MAX) {				\
+			__db_errx((e), DB_STR("0769",			\
+			    "Blob size overflow."));			\
+			(ret) = EINVAL;					\
+		}							\
+		(o) = (int32_t)(p).size;				\
+	}								\
+} while (0);
+
+#define	SET_BLOB_FIELD(p, v, type, field)	do {			\
+	u_int64_t tmp;							\
+	tmp = (u_int64_t)(v);						\
+	memcpy((u_int8_t *)(p) + SSZ(type, field),			\
+	    &tmp, sizeof(u_int64_t));					\
+} while (0);
+
+#define	SET_BLOB_ID(p, v, type)						\
+    SET_BLOB_FIELD(p, v, type, id)
+
+#define	SET_BLOB_SIZE(p, v, type)					\
+    SET_BLOB_FIELD(p, v, type, size)
+
+#define	SET_BLOB_FILE_ID(p, v, type)					\
+    SET_BLOB_FIELD(p, v, type, file_id)
+
+#define	SET_BLOB_SDB_ID(p, v, type)					\
+    SET_BLOB_FIELD(p, v, type, sdb_id)
+
+/*
+ * Page space required to add a new HBLOB item to the page, with and
+ * without the index value.
+ */
+#define	HBLOB_SIZE		(sizeof(HBLOB))
+#define	HBLOB_DSIZE		(sizeof(HBLOB) - SSZA(HKEYDATA, data))
+#define	HBLOB_PSIZE		(HBLOB_SIZE + sizeof(db_indx_t))
+
+
 /************************************************************************
  BTREE PAGE LAYOUT
  ************************************************************************/
@@ -693,6 +810,7 @@ typedef struct _hoffdup {
 #define	B_KEYDATA	1	/* Key/data item. */
 #define	B_DUPLICATE	2	/* Duplicate key/data item. */
 #define	B_OVERFLOW	3	/* Overflow key/data item. */
+#define	B_BLOB		4	/* Blob file key/data item. */
 
 /*
  * We have to store a deleted entry flag in the page.   The reason is complex,
@@ -746,6 +864,32 @@ typedef struct _boverflow {
 	u_int32_t tlen;		/* 08-11: Total length of item. */
 } BOVERFLOW;
 
+/*
+ * The fourth type is the B_BLOB, represented by the BBLOB structure.
+ * Saving bytes is not a concern for the blob record type - if too many
+ * fit onto a single page, then we're likely to introduce unnecessary
+ * contention for blobs. Using blobs implies storing large items, thus slightly
+ * more per-item overhead is acceptable.
+ * The len field is set to BBLOB_DSIZE, so that a B_BLOB can be treated just
+ * like a B_KEYDATA for the purposes of moving items between or on a page.
+ * If this proves untrue, the crypto section of the record could be optional.
+ * encoding, lsn, encryption, and checksum fields are unused at the moment, but
+ * included to make adding those features easier.
+ */
+typedef struct _bblob {
+	db_indx_t len;			/* 00-01: BBLOB_DSIZE. */
+	u_int8_t  type;			/*    02: Page type and delete flag. */
+	u_int8_t  encoding;		/*    03: Encoding of blob file. */
+	u_int8_t  unused[8];		/* 04-11: Padding, unused. */
+	u_int8_t  chksum[DB_MAC_KEY];	/* 12-31: Checksum */
+	u_int8_t  iv[DB_IV_BYTES];	/* 32-47: IV */
+	u_int64_t id;			/* 48-55: Blob file identifier. */
+	u_int64_t size;			/* 56-63: Blob file size. */
+	u_int64_t file_id;		/* 64-71: File directory. */
+	u_int64_t sdb_id;		/* 72-79: Subdb that owns this blob. */
+} BBLOB, BBLOB60P1;
+#define	BBLOB_DATA(p)	((u_int8_t *)((BKEYDATA *)p)->data)
+
 /* Get a BOVERFLOW item for a specific index. */
 #define	GET_BOVERFLOW(dbp, pg, indx)					\
 	((BOVERFLOW *)P_ENTRY(dbp, pg, indx))
@@ -759,13 +903,26 @@ typedef struct _boverflow {
 #define	BOVERFLOW_PSIZE							\
 	(BOVERFLOW_SIZE + sizeof(db_indx_t))
 
+/*
+ * Page space required to add a new BBLOB item to the page, with and
+ * without the index value.  BBLOB_DSIZE is used so that a B_BLOB item
+ * can be treated just like a B_KEYDATA for the purposes of moving items
+ * between or on a page, such as when doing compaction.
+ */
+#define	BBLOB_SIZE							\
+	((u_int16_t)DB_ALIGN(sizeof(BBLOB), sizeof(u_int32_t)))
+#define	BBLOB_DSIZE							\
+	(BBLOB_SIZE - SSZA(BKEYDATA, data))
+#define	BBLOB_PSIZE							\
+	(BBLOB_SIZE + sizeof(db_indx_t))
+
 #define	BITEM_SIZE(bk)							\
-	(B_TYPE((bk)->type) != B_KEYDATA ? BOVERFLOW_SIZE :		\
-	BKEYDATA_SIZE((bk)->len))
+	(B_TYPE((bk)->type) == B_KEYDATA ? BKEYDATA_SIZE((bk)->len) :	\
+	(B_TYPE((bk)->type) == B_BLOB ? BBLOB_SIZE : BOVERFLOW_SIZE))
 
 #define	BITEM_PSIZE(bk)							\
-	(B_TYPE((bk)->type) != B_KEYDATA ? BOVERFLOW_PSIZE :		\
-	BKEYDATA_PSIZE((bk)->len))
+	(B_TYPE((bk)->type) == B_KEYDATA ? BKEYDATA_PSIZE((bk)->len) :	\
+	(B_TYPE((bk)->type) == B_BLOB ? BBLOB_PSIZE : BOVERFLOW_PSIZE))
 
 /*
  * Btree leaf and hash page layouts group indices in sets of two, one for the
diff --git a/src/dbinc/db_swap.h b/src/dbinc/db_swap.h
index 352ae227..06f4eb47 100644
--- a/src/dbinc/db_swap.h
+++ b/src/dbinc/db_swap.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994
@@ -51,15 +51,26 @@ extern "C" {
 #define	M_64_SWAP(a) {							\
 	u_int64_t _tmp;							\
 	_tmp = (u_int64_t)a;						\
-	((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[7];			\
-	((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[6];			\
-	((u_int8_t *)&a)[2] = ((u_int8_t *)&_tmp)[5];			\
-	((u_int8_t *)&a)[3] = ((u_int8_t *)&_tmp)[4];			\
-	((u_int8_t *)&a)[4] = ((u_int8_t *)&_tmp)[3];			\
-	((u_int8_t *)&a)[5] = ((u_int8_t *)&_tmp)[2];			\
-	((u_int8_t *)&a)[6] = ((u_int8_t *)&_tmp)[1];			\
-	((u_int8_t *)&a)[7] = ((u_int8_t *)&_tmp)[0];			\
+	((u_int8_t *)&(a))[0] = ((u_int8_t *)&_tmp)[7];			\
+	((u_int8_t *)&(a))[1] = ((u_int8_t *)&_tmp)[6];			\
+	((u_int8_t *)&(a))[2] = ((u_int8_t *)&_tmp)[5];			\
+	((u_int8_t *)&(a))[3] = ((u_int8_t *)&_tmp)[4];			\
+	((u_int8_t *)&(a))[4] = ((u_int8_t *)&_tmp)[3];			\
+	((u_int8_t *)&(a))[5] = ((u_int8_t *)&_tmp)[2];			\
+	((u_int8_t *)&(a))[6] = ((u_int8_t *)&_tmp)[1];			\
+	((u_int8_t *)&(a))[7] = ((u_int8_t *)&_tmp)[0];			\
 }
+#undef	P_64_COPYSWAP
+#define	P_64_COPYSWAP(a, b) do {					\
+	((u_int8_t *)b)[0] = ((u_int8_t *)a)[7];			\
+	((u_int8_t *)b)[1] = ((u_int8_t *)a)[6];			\
+	((u_int8_t *)b)[2] = ((u_int8_t *)a)[5];			\
+	((u_int8_t *)b)[3] = ((u_int8_t *)a)[4];			\
+	((u_int8_t *)b)[4] = ((u_int8_t *)a)[3];			\
+	((u_int8_t *)b)[5] = ((u_int8_t *)a)[2];			\
+	((u_int8_t *)b)[6] = ((u_int8_t *)a)[1];			\
+	((u_int8_t *)b)[7] = ((u_int8_t *)a)[0];			\
+} while (0)
 #undef	P_64_COPY
 #define	P_64_COPY(a, b) {						\
 	((u_int8_t *)b)[0] = ((u_int8_t *)a)[0];			\
@@ -113,7 +124,7 @@ extern "C" {
 	P_32_COPYSWAP(&_tmp, a);					\
 } while (0)
 #undef	M_32_SWAP
-#define	M_32_SWAP(a) P_32_SWAP(&a)
+#define	M_32_SWAP(a) P_32_SWAP(&(a))
 
 /*
  * Little endian <==> big endian 16-bit swap macros.
@@ -139,8 +150,13 @@ extern "C" {
 	P_16_COPYSWAP(&_tmp, a);					\
 } while (0)
 #undef	M_16_SWAP
-#define	M_16_SWAP(a) P_16_SWAP(&a)
+#define	M_16_SWAP(a) P_16_SWAP(&(a))
 
+#undef	SWAP64
+#define	SWAP64(p) {							\
+	P_64_SWAP(p);							\
+	(p) += sizeof(u_int64_t);					\
+}
 #undef	SWAP32
 #define	SWAP32(p) {							\
 	P_32_SWAP(p);							\
@@ -168,6 +184,25 @@ extern "C" {
 		P_32_SWAP(p);						\
 } while (0)
 
+#undef	DB_NTOHLL_COPYIN
+#define	DB_NTOHLL_COPYIN(env, i, p) do {				\
+	u_int8_t *tmp;							\
+	tmp = (u_int8_t *)&(i);						\
+	if (F_ISSET(env, ENV_LITTLEENDIAN)) {				\
+		tmp[7] = *p++;						\
+		tmp[6] = *p++;						\
+		tmp[5] = *p++;						\
+		tmp[4] = *p++;						\
+		tmp[3] = *p++;						\
+		tmp[2] = *p++;						\
+		tmp[1] = *p++;						\
+		tmp[0] = *p++;						\
+	} else {							\
+		memcpy(&(i), p, sizeof(u_int64_t));			\
+		p = (u_int8_t *)p + sizeof(u_int64_t);			\
+	}								\
+} while (0)
+
 #undef	DB_NTOHL_COPYIN
 #define	DB_NTOHL_COPYIN(env, i, p) do {					\
 	u_int8_t *tmp;							\
@@ -178,7 +213,7 @@ extern "C" {
 		tmp[1] = *p++;						\
 		tmp[0] = *p++;						\
 	} else {							\
-		memcpy(&i, p, sizeof(u_int32_t));			\
+		memcpy(&(i), p, sizeof(u_int32_t));			\
 		p = (u_int8_t *)p + sizeof(u_int32_t);			\
 	}								\
 } while (0)
@@ -191,11 +226,29 @@ extern "C" {
 		tmp[1] = *p++;						\
 		tmp[0] = *p++;						\
 	} else {							\
-		memcpy(&i, p, sizeof(u_int16_t));			\
+		memcpy(&(i), p, sizeof(u_int16_t));			\
 		p = (u_int8_t *)p + sizeof(u_int16_t);			\
 	}								\
 } while (0)
 
+#undef	DB_HTONLL_COPYOUT
+#define	DB_HTONLL_COPYOUT(env, p, i) do {				\
+	u_int8_t *tmp;							\
+	tmp = (u_int8_t *)p;						\
+	if (F_ISSET(env, ENV_LITTLEENDIAN)) {				\
+		*tmp++ = ((u_int8_t *)&(i))[7];				\
+		*tmp++ = ((u_int8_t *)&(i))[6];				\
+		*tmp++ = ((u_int8_t *)&(i))[5];				\
+		*tmp++ = ((u_int8_t *)&(i))[4];				\
+		*tmp++ = ((u_int8_t *)&(i))[3];				\
+		*tmp++ = ((u_int8_t *)&(i))[2];				\
+		*tmp++ = ((u_int8_t *)&(i))[1];				\
+		*tmp++ = ((u_int8_t *)&(i))[0];				\
+	} else								\
+		memcpy(p, &(i), sizeof(u_int64_t));			\
+	p = (u_int8_t *)p + sizeof(u_int64_t);				\
+} while (0)
+
 #undef	DB_HTONL_COPYOUT
 #define	DB_HTONL_COPYOUT(env, p, i) do {				\
 	u_int8_t *tmp;							\
@@ -206,7 +259,7 @@ extern "C" {
 		*tmp++ = ((u_int8_t *)&(i))[1];				\
 		*tmp++ = ((u_int8_t *)&(i))[0];				\
 	} else								\
-		memcpy(p, &i, sizeof(u_int32_t));			\
+		memcpy(p, &(i), sizeof(u_int32_t));			\
 	p = (u_int8_t *)p + sizeof(u_int32_t);				\
 } while (0)
 
@@ -229,6 +282,13 @@ extern "C" {
  */
 #define	LOG_SWAPPED(env) !F_ISSET(env, ENV_LITTLEENDIAN)
 
+#define	LOGCOPY_64(env, x, p) do {					\
+	if (LOG_SWAPPED(env))						\
+		P_64_COPYSWAP((p), (x));				\
+	else								\
+		memcpy((x), (p), sizeof(u_int64_t));			\
+} while (0)
+
 #define	LOGCOPY_32(env, x, p) do {					\
 	if (LOG_SWAPPED(env))						\
 		P_32_COPYSWAP((p), (x));				\
diff --git a/src/dbinc/db_upgrade.h b/src/dbinc/db_upgrade.h
index 45fb624d..716594c9 100644
--- a/src/dbinc/db_upgrade.h
+++ b/src/dbinc/db_upgrade.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -242,6 +242,123 @@ typedef struct hashhdr {	/* Disk resident portion */
 	 */
 } HASHHDR;
 
+
+/************************************************************************
+ BLOB RECORD LAYOUTS
+ ************************************************************************/
+
+/*
+ * Hash BLOB record layout.
+ */
+typedef struct _hblob60 {
+	u_int8_t  type;			/*    00: Page type and delete flag. */
+	u_int8_t  encoding;		/*    01: Encoding of blob file. */
+	u_int8_t  unused[2];		/* 02-03: Padding, unused. */
+	u_int32_t id_lo;		/* 04-07: Blob file identifier. */
+	u_int32_t id_hi;		/* 07-11: Blob file identifier. */
+	u_int32_t size_lo;		/* 12-15: Blob file size. */
+	u_int32_t size_hi;		/* 15-19: Blob file size. */
+	DB_LSN    lsn;			/* 20-27: LSN for blob file update. */
+	u_int8_t  chksum[DB_MAC_KEY];	/* 28-47: Checksum */
+	u_int8_t  iv[DB_IV_BYTES];	/* 48-63: IV */
+	u_int32_t file_id_lo;		/* 64-67: File directory lo. */
+	u_int32_t file_id_hi;		/* 68-71: File directory hi. */
+	u_int32_t sdb_id_lo;		/* 72-75: Subdb that owns this blob. */
+	u_int32_t sdb_id_hi;		/* 76-79: Subdb that owns this blob. */
+} HBLOB60;
+
+#define	HBLOB60_SIZE		(sizeof(HBLOB60))
+
+/*
+ * Btree BLOB record layout.
+ */
+typedef struct _bblob60 {
+	db_indx_t len;			/* 00-01: BBLOB_DSIZE. */
+	u_int8_t  type;			/*    02: Page type and delete flag. */
+	u_int8_t  encoding;		/*    03: Encoding of blob file. */
+	u_int32_t id_lo;		/* 04-07: Blob file identifier. */
+	u_int32_t id_hi;		/* 08-11: Blob file identifier. */
+	u_int32_t size_lo;		/* 12-15: Blob file size. */
+	u_int32_t size_hi;		/* 15-19: Blob file size. */
+	DB_LSN    lsn;			/* 20-27: LSN for blob file update. */
+	u_int8_t  chksum[DB_MAC_KEY];	/* 28-47: Checksum */
+	u_int8_t  iv[DB_IV_BYTES];	/* 48-63: IV */
+	u_int32_t file_id_lo;		/* 64-67: File directory lo. */
+	u_int32_t file_id_hi;		/* 68-71: File directory hi. */
+	u_int32_t sdb_id_lo;		/* 72-75: Subdb that owns this blob. */
+	u_int32_t sdb_id_hi;		/* 76-79: Subdb that owns this blob. */
+} BBLOB60;
+
+#define	BBLOB60_SIZE							\
+	((u_int16_t)DB_ALIGN(sizeof(BBLOB60), sizeof(u_int32_t)))
+/*
+ * Heap BLOB record layout.
+ */
+typedef struct _heapblob60 {
+	u_int8_t flags;			/* 00: Flags describing record. */
+	u_int8_t unused;		/* 01: Padding. */
+	u_int16_t size;			/* 02-03: The size of the stored data piece. */
+	u_int8_t  encoding;		/*    04: Encoding of blob file. */
+	u_int8_t  unused2[3];		/* 05-07: Padding, unused. */
+	u_int32_t id_lo;		/* 08-11: Blob file identifier. */
+	u_int32_t id_hi;		/* 12-15: Blob file identifier. */
+	u_int32_t size_lo;		/* 16-19: Blob file size. */
+	u_int32_t size_hi;		/* 20-23: Blob file size. */
+	u_int8_t  unused3[4];		/* 24-27: Padding, unused. */
+	u_int8_t  chksum[DB_MAC_KEY];	/* 28-47: Checksum */
+	u_int8_t  iv[DB_IV_BYTES];	/* 48-63: IV */
+	DB_LSN    lsn;			/* 64-67: LSN for blob file update. */
+	u_int32_t file_id_lo;		/* 68-71: File directory lo. */
+	u_int32_t file_id_hi;		/* 72-75: File directory hi. */
+} HEAPBLOBHDR60;
+
+#define HEAPBLOBREC60_SIZE		(sizeof(HEAPBLOBHDR60))
+
+#define GET_BLOB60_FILE_ID(e, p, o, ret)				\
+	GET_LO_HI(e, (p)->file_id_lo, (p)->file_id_hi, o, ret);
+
+#define GET_BLOB60_SDB_ID(e, p, o, ret)					\
+	GET_LO_HI(e, (p)->sdb_id_lo, (p)->sdb_id_hi, o, ret);
+
+/* Return a uintmax_t version of blob_id. */
+#define GET_BLOB60_ID(e, p, o, ret)	do {				\
+	DB_ASSERT((e), sizeof(o) <= 8);					\
+	if (sizeof(o) == 8) {						\
+		(o) = (p).id_hi;					\
+		(o) = (o) << 32;					\
+		(o) += (p).id_lo;					\
+	} else {							\
+		if ((p).id_hi > 0) {					\
+			__db_errx((e), DB_STR("0766",			\
+			    "Blob identifier overflow."));		\
+			(ret) = EINVAL;					\
+		}							\
+		(o) = (p).id_lo;					\
+	}								\
+} while (0);
+
+/* Return a off_t version of blob size. */
+#define GET_BLOB60_SIZE(e, p, o, ret)	do {				\
+	DB_ASSERT((e), sizeof(o) <= 8);					\
+	if (sizeof(o) == 8) {						\
+		(o) = (p).size_hi;					\
+		(o) = (o) << 32;					\
+		(o) += (p).size_lo;					\
+	} else {							\
+		if ((p).size_hi > 0) {					\
+			__db_errx((e), DB_STR("0767",			\
+			    "Blob size overflow."));			\
+			(ret) = EINVAL;					\
+		}							\
+		if ((p).size_lo > INT_MAX) {				\
+			__db_errx((e), DB_STR("0768",			\
+			    "Blob size overflow."));			\
+			(ret) = EINVAL;					\
+		}							\
+		(o) = (int32_t)(p).size_lo;				\
+	}								\
+} while (0);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/src/dbinc/db_verify.h b/src/dbinc/db_verify.h
index 68acbf6c..ea87680f 100644
--- a/src/dbinc/db_verify.h
+++ b/src/dbinc/db_verify.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -120,9 +120,10 @@ struct __vrfy_dbinfo {
 #define	SALVAGE_PRINTABLE	0x01	/* Output printable chars literally. */
 #define	SALVAGE_PRINTHEADER	0x02	/* Print the unknown-key header. */
 #define	SALVAGE_PRINTFOOTER	0x04	/* Print the unknown-key footer. */
-#define	SALVAGE_HASSUBDBS	0x08	/* There are subdatabases to salvage. */
-#define	VRFY_LEAFCHAIN_BROKEN	0x10	/* Lost one or more Btree leaf pgs. */
-#define	VRFY_QMETA_SET		0x20    /* We've seen a QUEUE meta page and
+#define	SALVAGE_STREAM_BLOB	0x08	/* Currently streaming a blob. */
+#define	SALVAGE_HASSUBDBS	0x10	/* There are subdatabases to salvage. */
+#define	SALVAGE_LEAFCHAIN_BROKEN 0x20	/* Lost one or more Btree leaf pgs. */
+#define	SALVAGE_QMETA_SET	0x40    /* We've seen a QUEUE meta page and
 					   set things up for it. */
 	u_int32_t	flags;
 }; /* VRFY_DBINFO */
diff --git a/src/dbinc/debug.h b/src/dbinc/debug.h
index a8da000d..5388b791 100644
--- a/src/dbinc/debug.h
+++ b/src/dbinc/debug.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -36,7 +36,13 @@ extern "C" {
 #define	DB_ASSERT(env, e)						\
 	((e) ? (void)0 : __db_assert(env, #e, __FILE__, __LINE__))
 #else
-#define	DB_ASSERT(env, e)	NOP_STATEMENT
+#define	DB_ASSERT(env, e)	((void)0)
+#endif
+
+#if defined(HAVE_ERROR_HISTORY)
+#define DB_DEBUG_MSG	__db_debug_msg
+#else
+#define DB_DEBUG_MSG	if (0) __db_debug_msg
 #endif
 
 /*
@@ -55,10 +61,11 @@ extern "C" {
  * of structure fields whose only purpose is padding, as well as when heap
  * memory that was never initialized is written to disk.
  */
+#define	UMRW_SET(var)			UMRW_SET_VALUE((var), 0)
 #ifdef	UMRW
-#define	UMRW_SET(v)	(v) = 0
+#define	UMRW_SET_VALUE(var, value)	(var) = (value)
 #else
-#define	UMRW_SET(v)	NOP_STATEMENT
+#define	UMRW_SET_VALUE(var, value)	NOP_STATEMENT
 #endif
 
 /*
@@ -73,6 +80,34 @@ typedef enum {
 } db_error_set_t;
 
 /*
+ * Use these macros wherever an error condition is initially noticed, e.g., when
+ * setting a value to any of the user visible error return codes, whether
+ * defined by Berkeley DB or by the operating environment (EINVAL).
+ * saving the specific source of an instance of an error code, including the
+ * time, stack, db name, current LSN, etc. If the error turns out to be
+ * important, the deferred message text is added to the text produced by
+ * __db_err(), __db_errx, and __db_syserr(). The additional information can be
+ * useful for diagnosing the behavior of applications under error conditions.
+ * It is enabled by configuring with --enable-error_history. The current
+ * implmentation requires pthreads' version of thread local storage.
+ */
+#ifdef HAVE_ERROR_HISTORY
+#define USR_ERR(env, errcode)		__db_diags((env), (errcode))
+#define DBC_ERR(dbc, errcode)		__dbc_diags((dbc), (errcode))
+#define MUTEX_ERR(env, mutex, errcode)	__mutex_diags((env), (mutex), (errcode))
+#define DISCARD_HISTORY(env)		__db_deferred_discard()
+/* Save at most 10KB of error history in an API call. Adjust this as desired. */
+#define DB_ERROR_HISTORY_SIZE		(10 * 1024)
+#else
+#define USR_ERR(env, errcode)		(errcode)
+#define DBC_ERR(dbc, errcode)		(errcode)
+#define MUTEX_ERR(env, mutex, errcode)	(errcode)
+#define DISCARD_HISTORY(env)		NOP_STATEMENT
+/* No space is needed when error history is disabled. */
+#define DB_ERROR_HISTORY_SIZE		0
+#endif
+
+/*
  * Message handling.  Use a macro instead of a function because va_list
  * references to variadic arguments cannot be reset to the beginning of the
  * variadic argument list (and then rescanned), by functions other than the
@@ -102,6 +137,7 @@ typedef enum {
 	    ((app_call) || F_ISSET((dbenv)->env, ENV_NO_OUTPUT_SET))))	\
 		__db_errfile(dbenv, error, error_set, fmt, __ap);	\
 	va_end(__ap);							\
+	DISCARD_HISTORY((dbenv)->env);						\
 }
 #else
 #define	DB_REAL_ERR(dbenv, error, error_set, app_call, fmt) {		\
@@ -127,6 +163,7 @@ typedef enum {
 	    ((app_call) || F_ISSET((dbenv)->env, ENV_NO_OUTPUT_SET))))	\
 		 __db_errfile(env, error, error_set, fmt, __ap);	\
 	va_end(__ap);							\
+	DISCARD_HISTORY(env);						\
 }
 #endif
 #if defined(STDC_HEADERS) || defined(__cplusplus)
@@ -192,7 +229,7 @@ typedef enum {
 #define	LOG_OP(C, T, O, K, A, F) {					\
 	DB_LSN __lsn;							\
 	DBT __op;							\
-	if (DBC_LOGGING((C))) {						\
+	if ((C)->dbp->log_filename != NULL && DBC_LOGGING((C))) {	\
 		memset(&__op, 0, sizeof(__op));				\
 		__op.data = O;						\
 		__op.size = (u_int32_t)strlen(O) + 1;			\
diff --git a/src/dbinc/fop.h b/src/dbinc/fop.h
index 94f27f9f..7ea62023 100644
--- a/src/dbinc/fop.h
+++ b/src/dbinc/fop.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -23,6 +23,20 @@ extern "C" {
 	(void)__memp_set_flags((D)->mpf, DB_MPOOL_NOFILE, 0);	\
 } while (0)
 
+/*
+ * Never change the value of DB_FOP_CREATE (0x00000002),
+ * DB_FOP_APPEND (0x00000001), and DB_FOP_REDO(0x00000008),
+ * as those values are used in write_file logs.
+ */
+#define	DB_FOP_APPEND		0x00000001 /* Appending to a file. */
+#define	DB_FOP_CREATE		0x00000002 /* Creating the file. */
+#define	DB_FOP_PARTIAL_LOG	0x00000004 /* Partial logging of file data. */
+#define	DB_FOP_REDO		0x00000008 /* File operation can be redone. */
+#define	DB_FOP_READONLY		0x00000010 /* File is read only. */
+#define	DB_FOP_WRITE		0x00000020 /* File is writeable. */
+#define	DB_FOP_SYNC_WRITE	0x00000040 /* Sync file on each write. */
+
+
 #include "dbinc_auto/fileops_auto.h"
 #include "dbinc_auto/fileops_ext.h"
 
diff --git a/src/dbinc/globals.h b/src/dbinc/globals.h
index 95e5c118..becd6365 100644
--- a/src/dbinc/globals.h
+++ b/src/dbinc/globals.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -52,21 +52,27 @@ typedef struct __db_globals {
 
 	char error_buf[40];		/* Error string buffer. */
 
-	int uid_init;			/* srand set in UID generator */
+	int random_seeded;		/* Has __os_srandom been called? */
 
-	u_long rand_next;		/* rand/srand value */
+#if defined(HAVE_RANDOM_R)
+	struct random_data random_data;	/* srandom_r/random_r argument */
+	char random_state[64];		/* random number state */
+#elif !defined(HAVE_RAND) && !defined(HAVE_RANDOM)
+	u_long rand_next;		/* next rand value for clib/rand.c */
+#endif
 
 	u_int32_t fid_serial;		/* file id counter */
 
 	int db_errno;			/* Errno value if not available */
 
-	size_t num_active_pids;		/* number of entries in active_pids */
-
-	size_t size_active_pids;	/* allocated size of active_pids */
+	char *saved_errstr;		/* saved error string from backup */
 
-	pid_t *active_pids;		/* array active pids */
+	char *time_format;		/* strftime-format for printing dates */
 
-	char *saved_errstr;		/* saved error string from backup */
+#if defined(HAVE_ERROR_HISTORY) && defined(HAVE_PTHREAD_SELF)
+	pthread_key_t msgs_key;
+	pthread_once_t thread_once;
+#endif
 
 	/* Underlying OS interface jump table.*/
 	void	(*j_assert) __P((const char *, const char *, int));
diff --git a/src/dbinc/hash.h b/src/dbinc/hash.h
index f485128a..55a64f87 100644
--- a/src/dbinc/hash.h
+++ b/src/dbinc/hash.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994
@@ -56,7 +56,7 @@ typedef struct hash_t {
 	u_int32_t h_nelem;	/* Number of elements. */
 				/* Hash and compare functions. */
 	u_int32_t (*h_hash) __P((DB *, const void *, u_int32_t));
-	int (*h_compare) __P((DB *, const DBT *, const DBT *));
+	int (*h_compare) __P((DB *, const DBT *, const DBT *, size_t *));
 } HASH;
 
 /* Cursor structure definitions. */
diff --git a/src/dbinc/heap.h b/src/dbinc/heap.h
index ca3407e0..bb96ebec 100644
--- a/src/dbinc/heap.h
+++ b/src/dbinc/heap.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 
 #ifndef _DB_HEAP_H_
@@ -26,7 +26,8 @@ struct __heap {		/* Heap access method. */
 
 	db_pgno_t curregion;	/* The region of the next insert. */
 	db_pgno_t maxpgno;	/* Maximum page number of a fixed size heap. */
-	int curpgindx;	/* The last used offset in the region's space bitmap. */
+	u_int32_t curpgindx;	/* The last used offset in the
+				 * region's space bitmap. */
 };
 
 struct __heap_cursor {
diff --git a/src/dbinc/hmac.h b/src/dbinc/hmac.h
index 2a495b17..f87965eb 100644
--- a/src/dbinc/hmac.h
+++ b/src/dbinc/hmac.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbinc/lock.h b/src/dbinc/lock.h
index eab51832..298b8527 100644
--- a/src/dbinc/lock.h
+++ b/src/dbinc/lock.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -37,7 +37,10 @@ extern "C" {
  */
 #define	LOCK_INVALID		INVALID_ROFF
 #define	LOCK_ISSET(lock)	((lock).off != LOCK_INVALID)
-#define	LOCK_INIT(lock)		((lock).off = LOCK_INVALID)
+#define	LOCK_INIT(lock)		do {			\
+	(lock).off = LOCK_INVALID;			\
+	UMRW_SET_VALUE((lock).mode, DB_LOCK_NG);	\
+} while(0)
 
 /*
  * Macro to identify a write lock for the purpose of counting locks
@@ -66,8 +69,8 @@ extern "C" {
 typedef struct __db_lockregion { /* SHARED */
 	db_mutex_t	mtx_region;	/* Region mutex. */
 
-	u_int32_t	need_dd;	/* flag for deadlock detector */
-	u_int32_t	detect;		/* run dd on every conflict */
+	u_int32_t	need_dd;	/* run dd on every conflict */
+	u_int32_t	detect;		/* flag for deadlock detector */
 	db_timespec	next_timeout;	/* next time to expire a lock */
 	db_mutex_t	mtx_dd;		/* mutex for lock object dd list. */
 	db_mutex_t	mtx_lockers;	/* mutex for locker allocation. */
@@ -92,7 +95,7 @@ typedef struct __db_lockregion { /* SHARED */
 
 	u_int32_t	lock_id;	/* Current lock(er) id to allocate. */
 	u_int32_t	cur_maxid;	/* Current max lock(er) id. */
-	u_int32_t	nlockers;	/* Current number of lockers. */
+	u_int32_t	nlockers;	/* Current number of locker ids. */
 	int32_t		nmodes;		/* Number of modes in conflict table. */
 	DB_LOCK_STAT	stat;		/* stats about locking. */
 } DB_LOCKREGION;
@@ -157,12 +160,16 @@ struct __db_locker { /* SHARED */
 	db_timespec	lk_expire;	/* When current lock expires. */
 	db_timespec	tx_expire;	/* When this txn expires. */
 	db_timeout_t	lk_timeout;	/* How long do we let locks live. */
+#ifdef DIAGNOSTIC
+	roff_t		prev_locker;	/* The thread's previous dbth_locker. */
+#endif
 
 #define	DB_LOCKER_DIRTY		0x0001	/* Has write locks. */
 #define	DB_LOCKER_INABORT	0x0002	/* Is aborting, don't abort again. */
 #define	DB_LOCKER_TIMEOUT	0x0004	/* Has timeout set. */
 #define	DB_LOCKER_FAMILY_LOCKER 0x0008	/* Part of a family of lockers. */
 #define	DB_LOCKER_HANDLE_LOCKER 0x0010	/* Not associated with a thread. */
+#define	DB_LOCKER_FREE 		0x0020	/* Diag: it is on the free list. */
 	u_int32_t flags;
 };
 
diff --git a/src/dbinc/log.h b/src/dbinc/log.h
index c4dea6fc..2e2929f0 100644
--- a/src/dbinc/log.h
+++ b/src/dbinc/log.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -55,6 +55,8 @@ struct __fname {
 			/* number of txn referencing + 1 for the db handle. */
 	u_int32_t txn_ref;
 
+	db_seq_t blob_file_id;		/* BLOB file directory id. */
+
 #define	DB_FNAME_CLOSED		0x01	/* DBP was closed. */
 #define	DB_FNAME_DURABLE	0x02	/* File is durable. */
 #define	DB_FNAME_INMEM		0x04	/* File is in memory. */
@@ -137,16 +139,18 @@ struct __db_log {
 	ENV	 *env;			/* Environment */
 	REGINFO	  reginfo;		/* Region information. */
 
-#define	DBLOG_AUTOREMOVE	0x01	/* Autoremove log files. */
-#define	DBLOG_DIRECT		0x02	/* Do direct I/O on the log. */
-#define	DBLOG_DSYNC		0x04	/* Set OS_DSYNC on the log. */
-#define	DBLOG_FORCE_OPEN	0x08	/* Force the DB open even if it appears
+#define	DBLOG_AUTOREMOVE	0x001	/* Autoremove log files. */
+#define	DBLOG_BLOB		0x002	/* Full logging of blob data. */
+#define	DBLOG_DIRECT		0x004	/* Do direct I/O on the log. */
+#define	DBLOG_DSYNC		0x008	/* Set OS_DSYNC on the log. */
+#define	DBLOG_FORCE_OPEN	0x010	/* Force the DB open even if it appears
 					 * to be deleted. */
-#define	DBLOG_INMEMORY		0x10	/* Logging is in memory. */
-#define	DBLOG_OPENFILES		0x20	/* Prepared files need to be open. */
-#define	DBLOG_RECOVER		0x40	/* We are in recovery. */
-#define	DBLOG_ZERO		0x80	/* Zero fill the log. */
-#define	DBLOG_VERIFYING		0x100	/* The log is being verified. */
+#define	DBLOG_INMEMORY		0x020	/* Logging is in memory. */
+#define	DBLOG_NOSYNC		0x040	/* Don't sync log files during flush. */
+#define	DBLOG_OPENFILES		0x080	/* Prepared files need to be open. */
+#define	DBLOG_RECOVER		0x100	/* We are in recovery. */
+#define	DBLOG_ZERO		0x200	/* Zero fill the log. */
+#define	DBLOG_VERIFYING		0x400	/* The log is being verified. */
 	u_int32_t flags;
 };
 
@@ -251,7 +255,8 @@ struct __log { /* SHARED */
 	 * rather than by the region mutex.
 	 */
 	db_mutex_t mtx_flush;		/* Mutex guarding flushing. */
-	int32_t	   in_flush;	/* Log flush in progress. */
+	int32_t	   in_flush;		/* Log flush in progress. */
+	int32_t	   nosync;		/* log_set_config(DB_LOG_NOSYNC) */
 	DB_LSN	   s_lsn;		/* LSN of the last sync. */
 
 	DB_LOG_STAT stat;		/* Log statistics. */
diff --git a/src/dbinc/log_verify.h b/src/dbinc/log_verify.h
index fa90ace4..ec43c4d7 100644
--- a/src/dbinc/log_verify.h
+++ b/src/dbinc/log_verify.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbinc/mp.h b/src/dbinc/mp.h
index 9a10c6d9..598ca366 100644
--- a/src/dbinc/mp.h
+++ b/src/dbinc/mp.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -226,10 +226,15 @@ struct __mpool { /* SHARED */
 #define	DB_MEMP_SYNC_INTERRUPT	0x02
 	u_int32_t config_flags;
 
-	/* Free frozen buffer headers, protected by the region lock. */
+	/* These MVCC fields are protected by the mpool region lock. */
+
+	/* This is the free list of BH_FROZEN_PAGEs, the frozen headers. */
 	SH_TAILQ_HEAD(__free_frozen) free_frozen;
 
-	/* Allocated blocks of frozen buffer headers. */
+	/*
+	 * This list of BH_FROZEN_ALLOCs contains all the BH_FROZEN_PAGEs,
+	 * whether they are in free_frozen or busy (in a bh.vc version chain).
+	 */
 	SH_TAILQ_HEAD(__alloc_frozen) alloc_frozen;
 };
 
@@ -550,9 +555,10 @@ struct __bh { /* SHARED */
 #define	BH_FROZEN	0x040		/* Frozen buffer: allocate & re-read. */
 #define	BH_TRASH	0x080		/* Page is garbage. */
 #define	BH_THAWED	0x100		/* Page was thawed. */
+#define	BH_UNREACHABLE	0x200		/* Discard this defunct MVCC version. */
 	u_int16_t	flags;
 
-	u_int32_t	priority;	/* Priority. */
+	u_int32_t	priority;	/* Cache priority. */
 	SH_TAILQ_ENTRY	hq;		/* MPOOL hash bucket queue. */
 
 	db_pgno_t	pgno;		/* Underlying MPOOLFILE page number. */
@@ -587,9 +593,12 @@ struct __bh_frozen_p {
 
 /*
  * BH_FROZEN_ALLOC --
- *	Frozen buffer headers are allocated a page at a time in general.  This
- *	structure is allocated at the beginning of the page so that the
- *	allocation chunks can be tracked and freed (for private environments).
+ *	This structure is the container for one or more frozen buffer headers.
+ *	Blocks of BH_FROZEN_PAGE structs are usually allocated a page at a time,
+ *	though when an mpool is nearly full and a whole page isn't available
+ *	there can be single-item blocks.  BH_FROZEN_ALLOC is the block header
+ *	allocated at the beginning of the chunk and is linked to the mpool's
+ *	alloc_frozen so that the allocation chunks can be tracked and freed.
  */
 struct __bh_frozen_a {
 	SH_TAILQ_ENTRY links;
@@ -602,33 +611,36 @@ struct __bh_frozen_a {
     (F_ISSET(PAGE_TO_BH(p), BH_DIRTY|BH_EXCLUSIVE) == (BH_DIRTY|BH_EXCLUSIVE))
 
 #define	BH_OWNER(env, bhp)						\
-    ((TXN_DETAIL *)R_ADDR(&env->tx_handle->reginfo, bhp->td_off))
+    ((TXN_DETAIL *)R_ADDR(&(env)->tx_handle->reginfo, (bhp)->td_off))
 
 #define	BH_OWNED_BY(env, bhp, txn)	((txn) != NULL &&		\
-    (bhp)->td_off != INVALID_ROFF &&					\
-    (txn)->td == BH_OWNER(env, bhp))
+    (bhp)->td_off != INVALID_ROFF && (txn)->td == BH_OWNER(env, bhp))
 
-#define	VISIBLE_LSN(env, bhp)						\
-    (&BH_OWNER(env, bhp)->visible_lsn)
+#define	VISIBLE_LSN(env, bhp)	(&BH_OWNER(env, bhp)->visible_lsn)
 
 /*
- * Make a copy of the buffer's visible LSN, one field at a time.  We rely on the
- * 32-bit operations being atomic.  The visible_lsn starts at MAX_LSN and is
- * set during commit or abort to the current LSN.
+ * MVCC Versions are visible only to snapshot transactions whose read_lsn is at
+ * least as recent (large) as the buffer's lsn. Visibility checks must be made
+ * from newest to oldest along bhp.vc, stopping at the first visible one.
+ * Unversioned buffers (those with invalid td_off) are always visible.
+ *
+ * BH_VISIBLE() makes a copy of the buffer's visible LSN, one field at a time.
+ * We rely on the 32-bit operations being atomic.  The visible_lsn starts at
+ * MAX_LSN and is set during commit or abort to the current LSN.
  *
- * If we race with a commit / abort, we may see either the file or the offset
+ * If we race with a commit or abort, we may see either the file or the offset
  * still at UINT32_MAX, so vlsn is guaranteed to be in the future.  That's OK,
  * since we had to take the log region lock to allocate the read LSN so we were
  * never going to see this buffer anyway.
  */
 #define	BH_VISIBLE(env, bhp, read_lsnp, vlsn)				\
     (bhp->td_off == INVALID_ROFF ||					\
-    ((vlsn).file = VISIBLE_LSN(env, bhp)->file,			\
+    ((vlsn).file = VISIBLE_LSN(env, bhp)->file,				\
     (vlsn).offset = VISIBLE_LSN(env, bhp)->offset,			\
     LOG_COMPARE((read_lsnp), &(vlsn)) >= 0))
 
 #define	BH_OBSOLETE(bhp, old_lsn, vlsn)	(SH_CHAIN_HASNEXT(bhp, vc) ?	\
-    BH_VISIBLE(env, SH_CHAIN_NEXTP(bhp, vc, __bh), &(old_lsn), vlsn) :\
+    BH_VISIBLE(env, SH_CHAIN_NEXTP(bhp, vc, __bh), &(old_lsn), vlsn) :	\
     BH_VISIBLE(env, bhp, &(old_lsn), vlsn))
 
 #define	MVCC_SKIP_CURADJ(dbc, pgno) (dbc->txn != NULL &&		\
diff --git a/src/dbinc/mutex.h b/src/dbinc/mutex.h
index b699142c..334d8f96 100644
--- a/src/dbinc/mutex.h
+++ b/src/dbinc/mutex.h
@@ -1,7 +1,7 @@
 /*
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -24,10 +24,14 @@ extern "C" {
 #endif
 
 /*
- * By default, spin 50 times per processor if fail to acquire a test-and-set
- * mutex, we have anecdotal evidence it's a reasonable value.
+ * These specify the default spin parameters for test-and-set mutexes. A single
+ * processor system spins just once, a multiprocessor system spins 50 times per
+ * processor up to a default maximum of 200. This limit reduces excessive
+ * busy-waiting on machines with many hyperthreads. We have anecdotal evidence
+ * that these are reasonable default values.
  */
 #define	MUTEX_SPINS_PER_PROCESSOR	50
+#define	MUTEX_SPINS_DEFAULT_MAX		200
 
 /*
  * Mutexes are represented by unsigned, 32-bit integral values.  As the
@@ -163,13 +167,6 @@ static inline int __db_pthread_mutex_tryreadlock(ENV *env, db_mutex_t mutex)
 #define	__mutex_rdlock(a, b)		__db_win32_mutex_readlock(a, b)
 #define	__mutex_tryrdlock(a, b)		__db_win32_mutex_tryreadlock(a, b)
 #endif
-#elif defined(HAVE_MUTEX_FCNTL)
-#define	__mutex_init(a, b, c)		__db_fcntl_mutex_init(a, b, c)
-#define	__mutex_lock(a, b)		__db_fcntl_mutex_lock(a, b, 0)
-#define	__mutex_timedlock(a, b, c)	__db_fcntl_lock(a, b, c)
-#define	__mutex_trylock(a, b)		__db_fcntl_mutex_trylock(a, b)
-#define	__mutex_unlock(a, b)		__db_fcntl_mutex_unlock(a, b)
-#define	__mutex_destroy(a, b)		__db_fcntl_mutex_destroy(a, b)
 #else
 #define	__mutex_init(a, b, c)		__db_tas_mutex_init(a, b, c)
 #define	__mutex_lock(a, b)		__db_tas_mutex_lock(a, b, 0)
@@ -184,9 +181,8 @@ static inline int __db_pthread_mutex_tryreadlock(ENV *env, db_mutex_t mutex)
 #endif
 
 /*
- * When there is no method to get a shared latch, fall back to
- * implementing __mutex_rdlock() as getting an exclusive one.
- * This occurs either when !HAVE_SHARED_LATCHES or HAVE_MUTEX_FCNTL.
+ * When there is no method to get a shared latch, fall back to implementing
+ * __mutex_rdlock() as an exclusive one. This may no longer be supported?
  */
 #ifndef __mutex_rdlock
 #define	__mutex_rdlock(a, b)		__mutex_lock(a, b)
@@ -199,17 +195,25 @@ static inline int __db_pthread_mutex_tryreadlock(ENV *env, db_mutex_t mutex)
  * Lock/unlock a mutex.  If the mutex was never required, the thread of
  * control can proceed without it.
  *
- * We never fail to acquire or release a mutex without panicing.  Simplify
+ * We rarely fail to acquire or release a mutex without panicing.  Simplify
  * the macros to always return a panic value rather than saving the actual
- * return value of the mutex routine.
+ * return value of the mutex routine. Use MUTEX_LOCK_RET() when the caller has
+ * a code path for a mutex failure, e.g., when cleaning up after a panic.
  */
 #ifdef HAVE_MUTEX_SUPPORT
 #define	MUTEX_LOCK(env, mutex) do {					\
-	if ((mutex) != MUTEX_INVALID &&					\
-	    __mutex_lock(env, mutex) != 0)				\
+	if ((mutex) != MUTEX_INVALID &&	__mutex_lock(env, mutex) != 0)	\
 		return (DB_RUNRECOVERY);				\
 } while (0)
 
+#define MUTEX_LOCK_RET(env, mutex) 					\
+	((mutex) == MUTEX_INVALID ? 0 : __mutex_lock(env, mutex))
+
+/*
+ * Always check the return value of MUTEX_TRYLOCK()!  Expect 0 on success,
+ * or possibly DB_RUNRECOVERY for failchk.
+ */
+
 /*
  * Always check the return value of MUTEX_TRYLOCK()!  Expect 0 on success,
  * or DB_LOCK_NOTGRANTED, or possibly DB_RUNRECOVERY for failchk.
@@ -217,9 +221,7 @@ static inline int __db_pthread_mutex_tryreadlock(ENV *env, db_mutex_t mutex)
 #define	MUTEX_TRYLOCK(env, mutex)					\
 	(((mutex) == MUTEX_INVALID) ? 0 : __mutex_trylock(env, mutex))
 
-/*
- * Acquire a DB_MUTEX_SHARED "mutex" in shared mode.
- */
+/* Acquire a latch (a DB_MUTEX_SHARED "mutex") in shared mode. */
 #define	MUTEX_READLOCK(env, mutex) do {					\
 	if ((mutex) != MUTEX_INVALID &&					\
 	    __mutex_rdlock(env, mutex) != 0)				\
@@ -234,30 +236,68 @@ static inline int __db_pthread_mutex_tryreadlock(ENV *env, db_mutex_t mutex)
 		return (DB_RUNRECOVERY);				\
 } while (0)
 
-#define	MUTEX_WAIT(env, mutex, duration) do {			      \
-	int __ret;						      \
-	if ((mutex) != MUTEX_INVALID &&				      \
-	    (__ret = __mutex_timedlock(env, mutex, duration)) != 0 && \
-	    __ret != DB_TIMEOUT)				      \
-		return (DB_RUNRECOVERY);			      \
+#define	MUTEX_WAIT(env, mutex, duration) do {				\
+	int __ret;							\
+	if ((mutex) != MUTEX_INVALID &&					\
+	    (__ret = __mutex_timedlock(env, mutex, duration)) != 0 &&	\
+	    __ret != DB_TIMEOUT)					\
+		return (DB_RUNRECOVERY);				\
 } while (0)
+
+/*
+ * Check that a particular mutex is exclusively held at least by someone, not
+ * necessarily the current thread.
+ */
+#define	MUTEX_IS_OWNED(env, mutex)					\
+	(mutex == MUTEX_INVALID || !MUTEX_ON(env) ||			\
+	F_ISSET(env->dbenv, DB_ENV_NOLOCKING) ||			\
+	F_ISSET(MUTEXP_SET(env, mutex), DB_MUTEX_LOCKED))
 #else
 /*
  * There are calls to lock/unlock mutexes outside of #ifdef's -- replace
  * the call with something the compiler can discard, but which will make
- * if-then-else blocks work correctly.
+ * if-then-else blocks work correctly, and suppress unused variable messages.
+ */
+#define	MUTEX_LOCK(env, mutex)		{ env = (env); mutex = (mutex); }
+#define	MUTEX_LOCK_RET(env, mutex)	( env = (env), mutex = (mutex), 0)
+#define	MUTEX_TRYLOCK(env, mutex)	( env = (env), mutex = (mutex), 0)
+#define	MUTEX_READLOCK(env, mutex)	{ env = (env); mutex = (mutex); }
+#define	MUTEX_TRY_READLOCK(env, mutex)	( env = (env), mutex = (mutex), 0 )
+#define	MUTEX_UNLOCK(env, mutex)	{ env = (env); mutex = (mutex); }
+#define	MUTEX_REQUIRED(env, mutex)	{ env = (env); mutex = (mutex); }
+#define	MUTEX_REQUIRED_READ(env, mutex)	{ env = (env); mutex = (mutex); }
+#define	MUTEX_WAIT(env, mutex, duration)	{			\
+	(env) = (env); (mutex) = (mutex); (duration) = (duration);	\
+}
+
+/*
+ * Every MUTEX_IS_OWNED() caller expects to own it. When there is no mutex
+ * support, act as if we have ownership.
  */
-#define	MUTEX_LOCK(env, mutex)		(mutex) = (mutex)
-#define	MUTEX_TRYLOCK(env, mutex)	(mutex) = (mutex)
-#define	MUTEX_READLOCK(env, mutex)	(mutex) = (mutex)
-#define	MUTEX_TRY_READLOCK(env, mutex)	(mutex) = (mutex)
-#define	MUTEX_UNLOCK(env, mutex)	(mutex) = (mutex)
-#define	MUTEX_REQUIRED(env, mutex)	(mutex) = (mutex)
-#define	MUTEX_REQUIRED_READ(env, mutex)	(mutex) = (mutex)
-#define	MUTEX_WAIT(env, mutex, duration) (mutex) = (mutex)
+#define	MUTEX_IS_OWNED(env, mutex)	1
 #endif
 
 /*
+ * Bulk initialization of mutexes in regions.
+ */
+
+#define MUTEX_BULK_INIT(env, region, start, howmany) do {		\
+	DB_MUTEX *__mutexp;						\
+	db_mutex_t __i = start;						\
+	u_int32_t __n = howmany;					\
+	for (__mutexp = MUTEXP_SET(env, __i);				\
+	    --__n > 0;							\
+	    __mutexp = MUTEXP_SET(env, __i)) {				\
+		__mutexp->flags = 0;					\
+		__i = (F_ISSET(env, ENV_PRIVATE)) ?			\
+		    ((uintptr_t)__mutexp + region->mutex_size) : __i + 1; \
+		__mutexp->mutex_next_link = __i;			\
+	}								\
+	__mutexp->flags = 0;						\
+	__mutexp->mutex_next_link = MUTEX_INVALID;			\
+} while (0)
+
+/*
  * Berkeley DB ports may require single-threading at places in the code.
  */
 #ifdef HAVE_MUTEX_VXWORKS
diff --git a/src/dbinc/mutex_int.h b/src/dbinc/mutex_int.h
index b9bccdf7..4a4468af 100644
--- a/src/dbinc/mutex_int.h
+++ b/src/dbinc/mutex_int.h
@@ -1,7 +1,7 @@
 /*
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -73,6 +73,14 @@ extern "C" {
 	else								\
 		RET_SET((pthread_mutex_lock(&(mutexp)->u.m.mutex)), ret); \
 } while (0)
+#define	RET_SET_PTHREAD_TIMEDLOCK(mutexp, timespec, ret) do {		\
+	if (F_ISSET(mutexp, DB_MUTEX_SHARED))				\
+		RET_SET(pthread_rwlock_timedwrlock(&(mutexp)->u.rwlock, \
+		    (timespec)), ret);					\
+	else								\
+		RET_SET(pthread_mutex_timedlock(&(mutexp)->u.m.mutex,	\
+		    (timespec)), ret); 					\
+} while (0)
 #define	RET_SET_PTHREAD_TRYLOCK(mutexp, ret) do {			\
 	if (F_ISSET(mutexp, DB_MUTEX_SHARED))				\
 		RET_SET((pthread_rwlock_trywrlock(&(mutexp)->u.rwlock)), \
@@ -84,6 +92,9 @@ extern "C" {
 #else
 #define	RET_SET_PTHREAD_LOCK(mutexp, ret)				\
 		RET_SET(pthread_mutex_lock(&(mutexp)->u.m.mutex), ret);
+#define	RET_SET_PTHREAD_TIMEDLOCK(mutexp, timespec, ret)		\
+		RET_SET(pthread_mutex_timedlock(&(mutexp)->u.m.mutex,	\
+		    (timespec)), ret);
 #define	RET_SET_PTHREAD_TRYLOCK(mutexp, ret)				\
 		RET_SET(pthread_mutex_trylock(&(mutexp)->u.m.mutex), ret);
 #endif
@@ -267,6 +278,11 @@ typedef abilock_t tsl_t;
 #include <sys/machlock.h>
 typedef lock_t tsl_t;
 
+/* 
+ * Solaris requires 8 byte alignment for pthread_mutex_t values.
+ */
+#define MUTEX_ALIGN 8
+
 /*
  * The functions are declared in <sys/machlock.h>, but under #ifdef KERNEL.
  * Re-declare them here to avoid warnings.
@@ -778,6 +794,7 @@ MUTEX_SET(tsl_t *tsl) {
 static inline void
 MUTEX_UNSET(tsl_t *tsl) {
 	__asm__ volatile(
+	       "       .set mips2          \n"
 	       "       .set noreorder      \n"
 	       "       sync                \n"
 	       "       sw      $0, %0      \n"
@@ -892,15 +909,22 @@ struct __db_mutexmgr {
 	REGINFO	 reginfo;		/* Region information */
 
 	void	*mutex_array;		/* Base of the mutex array */
+#ifdef HAVE_FAILCHK_BROADCAST
+	/*
+	 * The mutex lock functions wait for at most this long between checks
+	 * for DB_MUTEX_OWNER_DEAD. This field needs no mutex protection.
+	 */
+	db_timeout_t	failchk_polltime;
+#endif
 };
 
 /* Macros to lock/unlock the mutex region as a whole. */
-#define	MUTEX_SYSTEM_LOCK(dbenv)					\
-	MUTEX_LOCK(dbenv, ((DB_MUTEXREGION *)				\
-	    (dbenv)->mutex_handle->reginfo.primary)->mtx_region)
-#define	MUTEX_SYSTEM_UNLOCK(dbenv)					\
-	MUTEX_UNLOCK(dbenv, ((DB_MUTEXREGION *)				\
-	    (dbenv)->mutex_handle->reginfo.primary)->mtx_region)
+#define	MUTEX_SYSTEM_LOCK(env)						\
+	MUTEX_LOCK(env, ((DB_MUTEXREGION *)				\
+	    (env)->mutex_handle->reginfo.primary)->mtx_region)
+#define	MUTEX_SYSTEM_UNLOCK(env)					\
+	MUTEX_UNLOCK(env, ((DB_MUTEXREGION *)				\
+	    (env)->mutex_handle->reginfo.primary)->mtx_region)
 
 /*
  * DB_MUTEXREGION --
@@ -927,6 +951,16 @@ typedef struct __db_mutexregion { /* SHARED */
 } DB_MUTEXREGION;
 
 #ifdef HAVE_MUTEX_SUPPORT
+/*
+ * MTX_DIAG turns on the recording of when and where a mutex was locked. It has
+ * a large impact, and should only be turned on when debugging mutexes.
+ */
+#define MUTEX_STACK_TEXT_SIZE	600
+typedef struct __mutex_history { /* SHARED */
+	db_timespec when;
+	char	stacktext[MUTEX_STACK_TEXT_SIZE];
+} MUTEX_HISTORY;
+
 struct __db_mutex_t { /* SHARED */	/* Mutex. */
 #ifdef MUTEX_FIELDS
 	MUTEX_FIELDS			/* Opaque thread mutex structures. */
@@ -959,9 +993,9 @@ struct __db_mutex_t { /* SHARED */	/* Mutex. */
 
 	db_mutex_t mutex_next_link;	/* Linked list of free mutexes. */
 
-#ifdef HAVE_STATISTICS
 	int	  alloc_id;		/* Allocation ID. */
 
+#ifdef HAVE_STATISTICS
 	u_int32_t mutex_set_wait;	/* Granted after wait. */
 	u_int32_t mutex_set_nowait;	/* Granted without waiting. */
 #ifdef HAVE_SHARED_LATCHES
@@ -973,7 +1007,9 @@ struct __db_mutex_t { /* SHARED */	/* Mutex. */
 	u_int32_t hybrid_wakeup;	/* for counting spurious wakeups */
 #endif
 #endif
-
+#ifdef MUTEX_DIAG
+	MUTEX_HISTORY	mutex_history;
+#endif
 	/*
 	 * A subset of the flag arguments for __mutex_alloc().
 	 *
@@ -992,19 +1028,6 @@ struct __db_mutex_t { /* SHARED */	/* Mutex. */
 	    (indx) *							\
 	    ((DB_MUTEXREGION *)env->mutex_handle->reginfo.primary)->mutex_size))
 
-/*
- * Check that a particular mutex is exclusively held at least by someone, not
- * necessarily the current thread.
- */
-#ifdef HAVE_MUTEX_SUPPORT
-#define	MUTEX_IS_OWNED(env, mutex)					\
-	(mutex == MUTEX_INVALID || !MUTEX_ON(env) ||			\
-	F_ISSET(env->dbenv, DB_ENV_NOLOCKING) ||			\
-	F_ISSET(MUTEXP_SET(env, mutex), DB_MUTEX_LOCKED))
-#else
-#define	MUTEX_IS_OWNED(env, mutex)	0
-#endif
-
 #if defined(HAVE_MUTEX_HYBRID) ||  defined(DB_WIN32) ||		\
 	(defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_PTHREADS))
 #define	MUTEXP_IS_BUSY(mutexp)					\
diff --git a/src/dbinc/os.h b/src/dbinc/os.h
index 2515e6ee..ea1fd2c4 100644
--- a/src/dbinc/os.h
+++ b/src/dbinc/os.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbinc/partition.h b/src/dbinc/partition.h
index 09e42573..11cdfa6f 100644
--- a/src/dbinc/partition.h
+++ b/src/dbinc/partition.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * $Id$
@@ -22,6 +22,7 @@ typedef struct __db_partition {
 	u_int32_t	(*callback) (DB *, DBT *);
 #define	PART_CALLBACK	0x01
 #define	PART_RANGE	0x02
+#define	PART_KEYS_SETUP	0x04
 	u_int32_t	flags;
 } DB_PARTITION;
 
@@ -36,7 +37,14 @@ typedef struct __part_internal {
 
 #ifdef HAVE_PARTITION
 #define	PART_NAME	"__dbp.%s.%03d"
-#define	PART_LEN	(strlen("__dbp..")+3)
+/*
+ * Currently we only support no more than 1000000 partitions.
+ * If the limit is changed, the PART_DIGITS and PART_MAXIMUM
+ * should be changed accordingly.
+ */
+#define	PART_DIGITS	6
+#define	PART_MAXIMUM	1000000
+#define	PART_LEN	(sizeof("__dbp..") + PART_DIGITS)
 #define	PART_PREFIX	"__dbp."
 #define IS_PARTITION_DB_FILE(name)	(strncmp(name, PART_PREFIX,	\
 					    sizeof(PART_PREFIX) - 1) == 0)
diff --git a/src/dbinc/perfmon.h b/src/dbinc/perfmon.h
index c3b9b9fa..e89eba33 100644
--- a/src/dbinc/perfmon.h
+++ b/src/dbinc/perfmon.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbinc/qam.h b/src/dbinc/qam.h
index 657c11e2..d18f91f3 100644
--- a/src/dbinc/qam.h
+++ b/src/dbinc/qam.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbinc/queue.h b/src/dbinc/queue.h
index 5a62741a..c53941ab 100644
--- a/src/dbinc/queue.h
+++ b/src/dbinc/queue.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1991, 1993
diff --git a/src/dbinc/region.h b/src/dbinc/region.h
index ac0ff16f..070aff5f 100644
--- a/src/dbinc/region.h
+++ b/src/dbinc/region.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -134,7 +134,10 @@ typedef enum {
 	REGION_TYPE_LOG,
 	REGION_TYPE_MPOOL,
 	REGION_TYPE_MUTEX,
-	REGION_TYPE_TXN } reg_type_t;
+	REGION_TYPE_TXN,
+	/* This enum always must be the last, and is the largest valid type. */
+	REGION_TYPE_MAX = REGION_TYPE_TXN
+} reg_type_t;
 
 #define	INVALID_REGION_SEGID	-1	/* Segment IDs are either shmget(2) or
 					 * Win16 segment identifiers.  They are
@@ -196,10 +199,10 @@ typedef struct __db_reg_env { /* SHARED */
 
 
 	/*
-	 * The mtx_regenv mutex protects the environment reference count and
-	 * memory allocation from the primary shared region (the crypto, thread
-	 * control block and replication implementations allocate memory from
-	 * the primary shared region).
+	 * The mtx_regenv mutex protects the environment reference count,
+	 * blob threshold and memory allocation from the primary shared region
+	 * (the crypto, thread control block and replication implementations
+	 * allocate memory from the primary shared region).
 	 *
 	 * The rest of the fields are initialized at creation time, and don't
 	 * need mutex protection.  The flags, op_timestamp and rep_timestamp
@@ -209,6 +212,7 @@ typedef struct __db_reg_env { /* SHARED */
 	 */
 	db_mutex_t mtx_regenv;		/* Refcnt, region allocation mutex. */
 	u_int32_t  refcnt;		/* References to the environment. */
+	u_int32_t  blob_threshold;	/* Environment wide blob threshold. */
 
 	u_int32_t region_cnt;		/* Number of REGIONs. */
 	roff_t	  region_off;		/* Offset of region array */
@@ -227,6 +231,8 @@ typedef struct __db_reg_env { /* SHARED */
 	time_t	  op_timestamp;		/* Timestamp for operations. */
 	time_t	  rep_timestamp;	/* Timestamp for rep db handles. */
 	u_int32_t reg_panic;		/* DB_REGISTER triggered panic */
+	u_int32_t failure_panic;	/* Failchk or mutex lock saw a crash. */
+	char	  failure_symptom[DB_FAILURE_SYMPTOM_SIZE];
 	uintmax_t unused;		/* The ALLOC_LAYOUT structure follows
 					 * the REGENV structure in memory and
 					 * contains uintmax_t fields.  Force
@@ -308,11 +314,14 @@ struct __db_reginfo_t {		/* __env_region_attach IN parameters. */
 
 /*
  * PANIC_ISSET, PANIC_CHECK:
- *	Check to see if the DB environment is dead.
+ *	Check to see if the DB environment is dead. If the environment is still
+ *	attached to its regions, look in the REGENV. Otherwise, check whether
+ *	the region had the panic state set when this even detached from it.
  */
 #define	PANIC_ISSET(env)						\
-	((env) != NULL && (env)->reginfo != NULL &&			\
-	    ((REGENV *)(env)->reginfo->primary)->panic != 0 &&		\
+	((env) != NULL && ((env)->reginfo != NULL ?			\
+	    ((REGENV *)(env)->reginfo->primary)->panic != 0 :		\
+	    F_ISSET(env, ENV_REMEMBER_PANIC)) &&			\
 	    !F_ISSET((env)->dbenv, DB_ENV_NOPANIC))
 
 #define	PANIC_CHECK(env)						\
diff --git a/src/dbinc/rep.h b/src/dbinc/rep.h
index 75004239..f3bdf481 100644
--- a/src/dbinc/rep.h
+++ b/src/dbinc/rep.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -19,6 +19,7 @@ extern "C" {
  * Names of client temp databases.
  */
 #define	REPFILEPREFIX	"__db.rep"
+#define	REPBLOBNAME	"__db.rep.blob.db"
 #define	REPDBNAME	"__db.rep.db"
 #define	REPPAGENAME     "__db.reppg.db"
 
@@ -42,43 +43,58 @@ extern "C" {
 /*
  * Message types
  */
-#define	REP_INVALID	0	/* Invalid message type. */
-#define	REP_ALIVE	1	/* I am alive message. */
-#define	REP_ALIVE_REQ	2	/* Request for alive messages. */
-#define	REP_ALL_REQ	3	/* Request all log records greater than LSN. */
-#define	REP_BULK_LOG	4	/* Bulk transfer of log records. */
-#define	REP_BULK_PAGE	5	/* Bulk transfer of pages. */
-#define	REP_DUPMASTER	6	/* Duplicate master detected; propagate. */
-#define	REP_FILE	7	/* Page of a database file. NOTUSED */
-#define	REP_FILE_FAIL	8	/* File requested does not exist. */
-#define	REP_FILE_REQ	9	/* Request for a database file. NOTUSED */
-#define	REP_LEASE_GRANT	10	/* Client grants a lease to a master. */
-#define	REP_LOG		11	/* Log record. */
-#define	REP_LOG_MORE	12	/* There are more log records to request. */
-#define	REP_LOG_REQ	13	/* Request for a log record. */
-#define	REP_MASTER_REQ	14	/* Who is the master */
-#define	REP_NEWCLIENT	15	/* Announces the presence of a new client. */
-#define	REP_NEWFILE	16	/* Announce a log file change. */
-#define	REP_NEWMASTER	17	/* Announces who the master is. */
-#define	REP_NEWSITE	18	/* Announces that a site has heard from a new
-				 * site; like NEWCLIENT, but indirect.  A
-				 * NEWCLIENT message comes directly from the new
-				 * client while a NEWSITE comes indirectly from
-				 * someone who heard about a NEWSITE.
-				 */
-#define	REP_PAGE	19	/* Database page. */
-#define	REP_PAGE_FAIL	20	/* Requested page does not exist. */
-#define	REP_PAGE_MORE	21	/* There are more pages to request. */
-#define	REP_PAGE_REQ	22	/* Request for a database page. */
-#define	REP_REREQUEST	23	/* Force rerequest. */
-#define	REP_START_SYNC	24	/* Tell client to begin syncing a ckp.*/
-#define	REP_UPDATE	25	/* Environment hotcopy information. */
-#define	REP_UPDATE_REQ	26	/* Request for hotcopy information. */
-#define	REP_VERIFY	27	/* A log record for verification. */
-#define	REP_VERIFY_FAIL	28	/* The client is outdated. */
-#define	REP_VERIFY_REQ	29	/* Request for a log record to verify. */
-#define	REP_VOTE1	30	/* Send out your information for an election. */
-#define	REP_VOTE2	31	/* Send a "you are master" vote. */
+#define	REP_INVALID		0	/* Invalid message type. */
+#define	REP_ALIVE		1	/* I am alive message. */
+#define	REP_ALIVE_REQ		2	/* Request for alive messages. */
+#define	REP_ALL_REQ		3	/* Request all log records greater than
+					 * LSN. */
+#define	REP_BLOB_ALL_REQ	4	/* Request all the given blob files. */
+#define	REP_BLOB_CHUNK		5	/* A piece of data contained in a blob
+					 * file. */
+#define	REP_BLOB_CHUNK_REQ	6	/* Request a piece of data from a blob
+					 * file. */
+#define	REP_BLOB_UPDATE		7	/* A list of blob files for a
+					 * database. */
+#define	REP_BLOB_UPDATE_REQ	8	/* Request blob files. */
+#define	REP_BULK_LOG		9	/* Bulk transfer of log records. */
+#define	REP_BULK_PAGE		10	/* Bulk transfer of pages. */
+#define	REP_DUPMASTER		11	/* Duplicate master detected;
+					 * propagate. */
+#define	REP_FILE		12	/* Page of a database file. NOTUSED */
+#define	REP_FILE_FAIL		13	/* File requested does not exist. */
+#define	REP_FILE_REQ		14	/* Request for a database file.
+					 * NOTUSED */
+#define	REP_LEASE_GRANT		15	/* Client grants a lease to a master. */
+#define	REP_LOG			16	/* Log record. */
+#define	REP_LOG_MORE		17	/* There are more log records to
+					 * request. */
+#define	REP_LOG_REQ		18	/* Request for a log record. */
+#define	REP_MASTER_REQ		19	/* Who is the master */
+#define	REP_NEWCLIENT		20	/* Announces the presence of a new
+					 * client. */
+#define	REP_NEWFILE		21	/* Announce a log file change. */
+#define	REP_NEWMASTER		22	/* Announces who the master is. */
+#define	REP_NEWSITE		23	/* Announces that a site has heard from
+					 * a new site; like NEWCLIENT, but
+					 * indirect.  A NEWCLIENT message comes
+					 * directly from the new client while a
+					 * NEWSITE comes indirectly from
+					 * someone who heard about a NEWSITE.*/
+#define	REP_PAGE		24	/* Database page. */
+#define	REP_PAGE_FAIL		25	/* Requested page does not exist. */
+#define	REP_PAGE_MORE		26	/* There are more pages to request. */
+#define	REP_PAGE_REQ		27	/* Request for a database page. */
+#define	REP_REREQUEST		28	/* Force rerequest. */
+#define	REP_START_SYNC		29	/* Tell client to begin syncing a ckp.*/
+#define	REP_UPDATE		30	/* Environment hotcopy information. */
+#define	REP_UPDATE_REQ		31	/* Request for hotcopy information. */
+#define	REP_VERIFY		32	/* A log record for verification. */
+#define	REP_VERIFY_FAIL		33	/* The client is outdated. */
+#define	REP_VERIFY_REQ		34	/* Request for a log record to
+					 * verify. */
+#define	REP_VOTE1		35	/* Send out your information for an
+					 * election. */
+#define	REP_VOTE2		36	/* Send a "you are master" vote. */
 /*
  * Maximum message number for conversion tables.  Update this
  * value as the largest message number above increases.
@@ -90,7 +106,7 @@ extern "C" {
  * NOTE: When changing messages above, the two tables for upgrade support
  * need adjusting.  They are in rep_util.c.
  */
-#define	REP_MAX_MSG	31
+#define	REP_MAX_MSG	36
 
 /*
  * This is the list of client-to-client requests messages.
@@ -99,6 +115,8 @@ extern "C" {
  */
 #define	REP_MSG_REQ(rectype)			\
     (rectype == REP_ALL_REQ ||			\
+    rectype == REP_BLOB_ALL_REQ ||		\
+    rectype == REP_BLOB_CHUNK_REQ ||		\
     rectype == REP_LOG_REQ ||			\
     rectype == REP_PAGE_REQ ||			\
     rectype == REP_VERIFY_REQ)
@@ -125,6 +143,9 @@ extern "C" {
 #define	DB_LOGVERSION_51	17
 #define	DB_LOGVERSION_52	18
 #define	DB_LOGVERSION_53	19
+#define	DB_LOGVERSION_60	20
+#define	DB_LOGVERSION_60p1	21
+#define	DB_LOGVERSION_61	22
 #define	DB_LOGVERSION_MIN	DB_LOGVERSION_44
 #define	DB_REPVERSION_INVALID	0
 #define	DB_REPVERSION_44	3
@@ -132,11 +153,12 @@ extern "C" {
 #define	DB_REPVERSION_46	4
 #define	DB_REPVERSION_47	5
 #define	DB_REPVERSION_48	5
-#define	DB_REPVERSION_50	5
 #define	DB_REPVERSION_51	5
 #define	DB_REPVERSION_52	6
 #define	DB_REPVERSION_53	7
-#define	DB_REPVERSION		DB_REPVERSION_53
+#define	DB_REPVERSION_60	7
+#define	DB_REPVERSION_61	8
+#define	DB_REPVERSION		DB_REPVERSION_61
 #define	DB_REPVERSION_MIN	DB_REPVERSION_44
 
 /*
@@ -204,9 +226,20 @@ extern "C" {
 #define	REP_INITVERSION		3
 
 /*
+ * View/partial replication file name.
+ * The file is empty.  It exists as a permanent indicator that this
+ * environment can never be master.
+ */
+#define	REPVIEW		"__db.rep.view"
+#define	IS_VIEW_SITE(env)						\
+	(REP_ON(env) &&							\
+	    ((env)->rep_handle->region->stat.st_view != 0))
+
+/*
  * Database types for __rep_client_dbinit
  */
 typedef enum {
+	REP_BLOB,	/* Blob file database. */
 	REP_DB,		/* Log record database. */
 	REP_PG		/* Pg database. */
 } repdb_t;
@@ -239,7 +272,7 @@ typedef enum {
 typedef enum {
 	SYNC_OFF,	/* No recovery. */
 	SYNC_LOG,	/* Recovery - log. */
-	SYNC_PAGE,	/* Recovery - pages. */
+	SYNC_PAGE,	/* Recovery - pages and blobs. */
 	SYNC_UPDATE,	/* Recovery - update. */
 	SYNC_VERIFY 	/* Recovery - verify. */
 } repsync_t;
@@ -346,6 +379,17 @@ typedef struct __rep { /* SHARED */
 	u_int32_t	first_vers;	/* Log version of first log file. */
 	DB_LSN		last_lsn;	/* Latest LSN we need. */
 	/* These are protected by mtx_clientdb. */
+	db_seq_t	gap_bl_hi_id;	/* Last id in the blob gap. */
+	db_seq_t	gap_bl_hi_sid;	/* Last sid in the blob gap. */
+	off_t		gap_bl_hi_off;	/* Last offset in the blob gap. */
+	db_seq_t	last_blob_id;	/* Last id on the list to process. */
+	db_seq_t	last_blob_sid;	/* Last sid on the list to process. */
+	db_seq_t	prev_blob_id;	/* Previous last id on list. */
+	db_seq_t	prev_blob_sid;	/* Previous last sid on list. */
+	db_seq_t	highest_id;	/* Highest file id to request. */
+	u_int32_t	blob_more_files;/* More blob files to be processed. */
+	int		blob_sync;	/* Currently handling blobs. */
+	int		blob_rereq;	/* When to rereq a blob update msg. */
 	db_timespec	last_pg_ts;	/* Last page stored timestamp. */
 	db_pgno_t	ready_pg;	/* Next pg expected. */
 	db_pgno_t	waiting_pg;	/* First pg after gap. */
@@ -391,11 +435,13 @@ typedef struct __rep { /* SHARED */
 	roff_t		siteinfo_off;	/* Offset of site array region. */
 	u_int		site_cnt;	/* Array slots in use. */
 	u_int		site_max;	/* Total array slots allocated. */
+	u_int		sites_avail;	/* Total number of available sites. */
 	int		self_eid;	/* Where to find the local site. */
 	u_int		siteinfo_seq;	/* Number of updates to this info. */
 	u_int32_t	min_log_file;	/* Earliest log needed by repgroup. */
 
 	pid_t		listener;
+	u_int		listener_nthreads; /* # of msg threads in listener. */
 
 	int		perm_policy;
 	db_timeout_t	ack_timeout;
@@ -403,6 +449,11 @@ typedef struct __rep { /* SHARED */
 	db_timeout_t	connection_retry_wait;
 	db_timeout_t	heartbeat_frequency; /* Max period between msgs. */
 	db_timeout_t	heartbeat_monitor_timeout;
+	u_int32_t	inqueue_max_gbytes;
+	u_int32_t	inqueue_max_bytes;
+	u_int32_t	inqueue_rz_gbytes;
+	u_int32_t	inqueue_rz_bytes;
+	u_int32_t	inqueue_full_event_on;
 #endif  /* HAVE_REPLICATION_THREADS */
 
 	/* Statistics. */
@@ -419,12 +470,16 @@ typedef struct __rep { /* SHARED */
 #define	REP_C_2SITE_STRICT	0x00001		/* Don't cheat on elections. */
 #define	REP_C_AUTOINIT		0x00002		/* Auto initialization. */
 #define	REP_C_AUTOROLLBACK	0x00004		/* Discard client txns: sync. */
-#define	REP_C_BULK		0x00008		/* Bulk transfer. */
-#define	REP_C_DELAYCLIENT	0x00010		/* Delay client sync-up. */
-#define	REP_C_ELECTIONS		0x00020		/* Repmgr to use elections. */
-#define	REP_C_INMEM		0x00040		/* In-memory replication. */
-#define	REP_C_LEASE		0x00080		/* Leases configured. */
-#define	REP_C_NOWAIT		0x00100		/* Immediate error return. */
+#define	REP_C_AUTOTAKEOVER	0x00008		/* Auto listener take over. */
+#define	REP_C_BULK		0x00010		/* Bulk transfer. */
+#define	REP_C_DELAYCLIENT	0x00020		/* Delay client sync-up. */
+#define	REP_C_ELECT_LOGLENGTH	0x00040		/* Log length wins election. */
+#define	REP_C_ELECTIONS		0x00080		/* Repmgr to use elections. */
+#define	REP_C_INMEM		0x00100		/* In-memory replication. */
+#define	REP_C_LEASE		0x00200		/* Leases configured. */
+#define	REP_C_NOWAIT		0x00400		/* Immediate error return. */
+#define	REP_C_PREFMAS_CLIENT	0x00800		/* Preferred master client. */
+#define	REP_C_PREFMAS_MASTER	0x01000		/* Preferred master site. */
 	u_int32_t	config;		/* Configuration flags. */
 
 	/* Election. */
@@ -455,15 +510,17 @@ typedef struct __rep { /* SHARED */
 #define	REP_F_CLIENT		0x00000008	/* Client replica. */
 #define	REP_F_DELAY		0x00000010	/* Delaying client sync-up. */
 #define	REP_F_GROUP_ESTD	0x00000020	/* Rep group is established. */
-#define	REP_F_INUPDREQ		0x00000040	/* Thread in rep_update_req. */
-#define	REP_F_LEASE_EXPIRED	0x00000080	/* Leases guaranteed expired. */
-#define	REP_F_MASTER		0x00000100	/* Master replica. */
-#define	REP_F_MASTERELECT	0x00000200	/* Master elect. */
-#define	REP_F_NEWFILE		0x00000400	/* Newfile in progress. */
-#define	REP_F_NIMDBS_LOADED	0x00000800	/* NIMDBs are materialized. */
-#define	REP_F_SKIPPED_APPLY	0x00001000	/* Skipped applying a record. */
-#define	REP_F_START_CALLED	0x00002000	/* Rep_start called. */
-#define	REP_F_SYS_DB_OP		0x00004000	/* Operation in progress. */
+#define	REP_F_HOLD_GEN		0x00000040	/* PrefMas startup hold gen. */
+#define	REP_F_INUPDREQ		0x00000080	/* Thread in rep_update_req. */
+#define	REP_F_LEASE_EXPIRED	0x00000100	/* Leases guaranteed expired. */
+#define	REP_F_MASTER		0x00000200	/* Master replica. */
+#define	REP_F_MASTERELECT	0x00000400	/* Master elect. */
+#define	REP_F_NEWFILE		0x00000800	/* Newfile in progress. */
+#define	REP_F_NIMDBS_LOADED	0x00001000	/* NIMDBs are materialized. */
+#define	REP_F_READONLY_MASTER	0x00002000	/* PrefMas readonly master. */
+#define	REP_F_SKIPPED_APPLY	0x00004000	/* Skipped applying a record. */
+#define	REP_F_START_CALLED	0x00008000	/* Rep_start called. */
+#define	REP_F_SYS_DB_OP		0x00010000	/* Operation in progress. */
 	u_int32_t	flags;
 } REP;
 
@@ -525,7 +582,7 @@ do {									\
 /*
  * REP_F_EPHASE0 is not a *real* election phase.  It is used for
  * master leases and allowing the client to find the master or
- * expire its lease.  However, EPHASE0 is cleared by __rep_elect_done.
+ * expire its lease.
  */
 #define	IN_ELECTION(R)							\
 	FLD_ISSET((R)->elect_flags, REP_E_PHASE1 | REP_E_PHASE2)
@@ -594,6 +651,22 @@ do {									\
 } while (0)
 
 
+/* Macros to determine current replication configuration options. */
+#define REP_CONFIG_IS_SET(env, flags) 					\
+	(REP_ON(env) ? 							\
+	FLD_ISSET(((env)->rep_handle->region)->config, flags) : 	\
+	FLD_ISSET(((env)->rep_handle)->config, flags))
+#ifdef HAVE_REPLICATION_THREADS
+#define PREFMAS_IS_SET(env) 						\
+	(REP_CONFIG_IS_SET(env, 					\
+	(REP_C_PREFMAS_MASTER | REP_C_PREFMAS_CLIENT)))
+#else
+#define PREFMAS_IS_SET(env)	0
+#endif
+#define IS_PREFMAS_MODE(env)						\
+	(REP_ON(env) && PREFMAS_IS_SET(env) &&				\
+	((env)->rep_handle->region)->config_nsites < 3)
+
 /*
  * Gap processing flags.  These provide control over the basic
  * gap processing algorithm for some special cases.
@@ -603,11 +676,28 @@ do {									\
 					/* REREQUEST is a superset of FORCE. */
 
 /*
+ * Internal options for rep_start_int().  These are used by preferred master
+ * mode to help coordinate between the sites during changes of master.
+ */
+#define	REP_START_FORCE_ROLECHG	0x001	/* Force role change to advance gen. */
+#define	REP_START_HOLD_CLIGEN	0x002	/* Hold client gen before doing
+					 * lsnhist match. */
+#define	REP_START_WAIT_LOCKMSG	0x004	/* Wait for REP_LOCKOUT_MSG. */
+
+/*
  * Flags indicating what kind of record we want to back up to, in the log.
  */
-#define	REP_REC_COMMIT		0x001 	/* Most recent commit record. */
-#define	REP_REC_PERM		0x002	/* Most recent perm record. */
+#define REP_REC_COMMIT		0x001   /* Most recent commit record. */
+#define REP_REC_PERM		0x002   /* Most recent perm record. */
 					/* PERM is a superset of COMMIT. */
+#define REP_REC_PERM_DEL	0x004   /* Most recent PERM, or fail if a
+					 * file delete is found first. */
+
+/*
+ * Permanent record types.
+ */
+#define	IS_PERM_RECTYPE(rectype)					\
+    ((rectype) == DB___txn_regop || (rectype) == DB___txn_ckp)
 
 /*
  * Basic pre/post-amble processing.
@@ -692,7 +782,7 @@ do {									\
  * machine instruction.  A single 32-bit integer value is safe without a
  * mutex, but most other types of value should use a mutex.
  *
- * Any use of a mutex must be inside a matched pair of ENV_ENTER() and
+ * Use of a db_mutex_t mutex must be inside a matched pair of ENV_ENTER() and
  * ENV_LEAVE() macros.  This ensures that if a thread dies while holding
  * a lock (i.e. a mutex), recovery can clean it up so that it does not
  * indefinitely block other threads.
@@ -727,6 +817,9 @@ struct __db_rep {
 	/*
 	 * End of shared configuration information.
 	 */
+	int		(*partial)	/* View/partial replication function. */
+			    __P((DB_ENV *, const char *, int *, u_int32_t));
+
 	int		(*send)		/* Send function. */
 			    __P((DB_ENV *, const DBT *, const DBT *,
 			    const DB_LSN *, int, u_int32_t));
@@ -745,6 +838,7 @@ struct __db_rep {
 	DB_MPOOLFILE	*file_mpf;	/* Mpoolfile for current database. */
 	DB		*file_dbp;	/* This file's page info. */
 	DBC		*queue_dbc;	/* Dbc for a queue file. */
+	DB		*blob_dbp;	/* Blob file database. */
 
 	/*
 	 * Please change __rep_print_all (rep_stat.c) to track any changes made
@@ -759,6 +853,7 @@ struct __db_rep {
 	/*
 	 * Replication Framework (repmgr) per-process information.
 	 */
+	int		config_nthreads;/* Configured msg processing threads. */
 	u_int		nthreads;	/* Msg processing threads. */
 	u_int		athreads;	/* Space allocated for msg threads. */
 	u_int		non_rep_th;	/* Threads in GMDB or channel msgs. */
@@ -771,10 +866,13 @@ struct __db_rep {
 	db_timeout_t	connection_retry_wait;
 	db_timeout_t	heartbeat_frequency; /* Max period between msgs. */
 	db_timeout_t	heartbeat_monitor_timeout;
+	u_int32_t	inqueue_max_gbytes;
+	u_int32_t	inqueue_max_bytes;
 
 	/* Thread synchronization. */
 	REPMGR_RUNNABLE *selector, **messengers, **elect_threads;
 	REPMGR_RUNNABLE	*preferred_elect_thr;
+	REPMGR_RUNNABLE	*takeover_thread;
 	db_timespec	repstart_time;
 	mgr_mutex_t	*mutex;
 	cond_var_t	check_election, gmdb_idle, msg_avail;
@@ -799,12 +897,18 @@ struct __db_rep {
 	CONNECTION_LIST	connections;
 	RETRY_Q_HEADER	retries;	/* Sites needing connection retry. */
 	struct {
-		int	size;
+		u_int32_t gbytes;
+		u_int32_t bytes;
 		STAILQ_HEAD(__repmgr_q_header, __repmgr_message) header;
 	} input_queue;
 
 	socket_t	listen_fd;
 	db_timespec	last_bcast;	/* Time of last broadcast msg. */
+	db_timespec	last_hbeat;	/* Time of last heartbeat (prefmas). */
+	db_timespec	l_listener_chk; /* Time to check local listener. */
+	db_timeout_t	l_listener_wait;/* Timeout to check local listener. */
+	db_timespec	m_listener_chk; /* Time to check master listener. */
+	db_timeout_t	m_listener_wait;/* Timeout to check master listener. */
 
 	/*
 	 * Status of repmgr.  It is ready when repmgr is not yet started.  It
@@ -813,12 +917,15 @@ struct __db_rep {
 	 */
 	enum { ready, running, stopped } repmgr_status;
 	int		new_connection;	  /* Since last master seek attempt. */
+	int		demotion_pending; /* We're being demoted to a view. */
 	int		takeover_pending; /* We've been elected master. */
+	int		rejoin_pending; /* Join group retry after rejection. */
 	int		gmdb_busy;
 	int		client_intent;	/* Will relinquish master role. */
 	int		gmdb_dirty;
 	int		have_gmdb;
 	int		seen_repmsg;
+	int		view_mismatch; /* View callback and gmdb don't match. */
 
 	/*
 	 * Flag to show what kind of transaction is currently in progress.
@@ -854,6 +961,16 @@ struct __db_rep {
 	u_int8_t	*restored_list;
 	size_t		restored_list_length;
 
+	/*
+	 * Preferred master mode indicator for a pending action.  A
+	 * master_switch is initiated when the preferred master site is
+	 * ready to take over as master.  A start_temp_master is initiated
+	 * when the client site needs to start as the temporary master.
+	 */
+	enum { no_action, master_switch, start_temp_master } prefmas_pending;
+	/* The LSN at the very beginning of preferred master site startup. */
+	DB_LSN prefmas_init_lsn;
+
 	/* Application's message dispatch call-back function. */
 	void  (*msg_dispatch) __P((DB_ENV *, DB_CHANNEL *,
 		DBT *, u_int32_t, u_int32_t));
@@ -920,6 +1037,10 @@ struct __db_rep {
 	} else if (!F_ISSET((env)->rep_handle, DBREP_APP_REPMGR))	\
 		F_SET((env)->rep_handle, DBREP_APP_BASEAPI);		\
 } while (0)
+#define	ADJUST_AUTOTAKEOVER_WAITS(db_rep, timeout) do {			\
+	(db_rep)->l_listener_wait = timeout;				\
+	(db_rep)->m_listener_wait = 3 * timeout;			\
+} while (0)
 
 #else
 /*
@@ -935,6 +1056,9 @@ struct __db_rep {
 #define	APP_SET_BASEAPI(env) do {					\
 	;								\
 } while (0)
+#define	ADJUST_AUTOTAKEOVER_WAITS(db_rep, timeout) do {			\
+	;								\
+} while (0)
 #endif  /* HAVE_REPLICATION_THREADS */
 
 /*
@@ -945,22 +1069,27 @@ struct __db_rep {
  * compatibility with old versions, these values must be reserved explicitly in
  * the list of flag values (below)
  */
-#define	DB_LOG_PERM_42_44	0x20
-#define	DB_LOG_RESEND_42_44	0x40
-#define	REPCTL_INIT_45		0x02	/* Back compatible flag value. */
-
-#define	REPCTL_ELECTABLE	0x01	/* Upgraded client is electable. */
-#define	REPCTL_FLUSH		0x02	/* Record should be flushed. */
-#define	REPCTL_GROUP_ESTD	0x04	/* Message from site in a group. */
-#define	REPCTL_INIT		0x08	/* Internal init message. */
-#define	REPCTL_LEASE		0x10	/* Lease related message.. */
+#define	DB_LOG_PERM_42_44	0x020
+#define	DB_LOG_RESEND_42_44	0x040
+#define	REPCTL_INIT_45		0x002	/* Back compatible flag value. */
+
+/*
+ * Add new REPCTL flags to the end of this list to preserve compatibility
+ * with old versions.
+ */
+#define	REPCTL_ELECTABLE	0x001	/* Upgraded client is electable. */
+#define	REPCTL_FLUSH		0x002	/* Record should be flushed. */
+#define	REPCTL_GROUP_ESTD	0x004	/* Message from site in a group. */
+#define	REPCTL_INIT		0x008	/* Internal init message. */
+#define	REPCTL_LEASE		0x010	/* Lease related message. */
 			/*
 			 * Skip over reserved values 0x20
 			 * and 0x40, as explained above.
 			 */
-#define	REPCTL_LOG_END		0x80	/* Approximate end of group-wide log. */
+#define	REPCTL_LOG_END		0x080	/* Approximate end of group-wide log. */
 #define	REPCTL_PERM		DB_LOG_PERM_42_44
 #define	REPCTL_RESEND		DB_LOG_RESEND_42_44
+#define	REPCTL_INMEM_ONLY	0x100	/* In-memory databases only. */
 
 /*
  * File info flags for internal init.  The per-database (i.e., file) flag
@@ -1094,6 +1223,20 @@ typedef struct {
 	DBT		*objs;
 } linfo_t;
 
+/*
+ * Used to store information on the child transaction that opens a blob meta
+ * database.  In partial replication processing the child transaction of the
+ * blob meta database must be delayed until after processing the child
+ * transaction that opens the database that owns the BMD.
+ */
+typedef struct {
+	db_seq_t blob_file_id;
+	DB_LSN lsn;
+	u_int32_t child;
+	void *next;
+	void *prev;
+} DELAYED_BLOB_LIST;
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/src/dbinc/repmgr.h b/src/dbinc/repmgr.h
index d8fd199c..a38defa2 100644
--- a/src/dbinc/repmgr.h
+++ b/src/dbinc/repmgr.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2006, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2006, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -47,20 +47,29 @@ extern "C" {
  * In protocol version one there were only three message types: 1, 2, and 3; so
  * 3 was the max.  In protocol version 2 we introduced heartbeats, type 4.
  * (Protocol version 3 did not introduce any new message types.)  In version 4
- * we introduced a few more new message types, the largest of which had value 7.
+ * we introduced a few more new message types, the largest of which had value 8.
+ * Protocol version 5 did not introduce any new message types, but changed
+ * the format of site info and membership data to support views.
+ *
+ * Protocol version 6 introduced preferred master mode, which added several
+ * new REPMGR_OWN messages.
  */
 #define	REPMGR_MAX_V1_MSG_TYPE	3
 #define	REPMGR_MAX_V2_MSG_TYPE	4
 #define	REPMGR_MAX_V3_MSG_TYPE	4
 #define	REPMGR_MAX_V4_MSG_TYPE	8
+#define	REPMGR_MAX_V5_MSG_TYPE	8
+#define	REPMGR_MAX_V6_MSG_TYPE	8
 #define	HEARTBEAT_MIN_VERSION	2
 #define	CHANNEL_MIN_VERSION	4
 #define	CONN_COLLISION_VERSION	4
 #define	GM_MIN_VERSION		4
 #define	OWN_MIN_VERSION		4
+#define	VIEW_MIN_VERSION	5
+#define	PREFMAS_MIN_VERSION	6
 
 /* The range of protocol versions we're willing to support. */
-#define	DB_REPMGR_VERSION	4
+#define	DB_REPMGR_VERSION	6
 #define	DB_REPMGR_MIN_VERSION	1
 
 /*
@@ -73,18 +82,30 @@ extern "C" {
  * Like the message format types, these message type values should be
  * permanently frozen.
  */
-#define	REPMGR_CONNECT_REJECT	1
-#define	REPMGR_GM_FAILURE	2
-#define	REPMGR_GM_FORWARD	3
-#define	REPMGR_JOIN_REQUEST	4
-#define	REPMGR_JOIN_SUCCESS	5
-#define	REPMGR_PARM_REFRESH	6
-#define	REPMGR_REJOIN		7
-#define	REPMGR_REMOVE_REQUEST	8
-#define	REPMGR_REMOVE_SUCCESS	9
-#define	REPMGR_RESOLVE_LIMBO	10
-#define	REPMGR_SHARING		11
-
+#define	REPMGR_CONNECT_REJECT		1
+#define	REPMGR_GM_FAILURE		2
+#define	REPMGR_GM_FORWARD		3
+#define	REPMGR_JOIN_REQUEST		4
+#define	REPMGR_JOIN_SUCCESS		5
+#define	REPMGR_PARM_REFRESH		6
+#define	REPMGR_REJOIN			7
+#define	REPMGR_REMOVE_REQUEST		8
+#define	REPMGR_REMOVE_SUCCESS		9
+#define	REPMGR_RESOLVE_LIMBO		10
+#define	REPMGR_SHARING			11
+#define	REPMGR_LSNHIST_REQUEST		12
+#define	REPMGR_LSNHIST_RESPONSE		13
+#define	REPMGR_PREFMAS_FAILURE		14
+#define	REPMGR_PREFMAS_SUCCESS		15
+#define	REPMGR_READONLY_MASTER		16
+#define	REPMGR_READONLY_RESPONSE	17
+#define	REPMGR_RESTART_CLIENT		18
+
+/* Detect inconsistencies between view callback and site's gmdb. */
+#define PARTICIPANT_TO_VIEW(db_rep, site)      				\
+	((db_rep)->partial && !FLD_ISSET((site)->gmdb_flags, SITE_VIEW))
+#define VIEW_TO_PARTICIPANT(db_rep, site)      				\
+	(!(db_rep)->partial && FLD_ISSET((site)->gmdb_flags, SITE_VIEW))
 
 struct __repmgr_connection;
     typedef struct __repmgr_connection REPMGR_CONNECTION;
@@ -98,7 +119,8 @@ struct __cond_waiters_table;
     typedef struct __cond_waiters_table COND_WAITERS_TABLE;
 
 /* Current Group Membership DB format ID. */
-#define	REPMGR_GMDB_FMT_VERSION	1
+#define	REPMGR_GMDB_FMT_VERSION		2
+#define	REPMGR_GMDB_FMT_MIN_VERSION	1
 
 #ifdef DB_WIN32
 typedef SOCKET socket_t;
@@ -151,6 +173,17 @@ typedef char SITE_STRING_BUFFER[MAX_SITE_LOC_STRING+1];
 #define	DB_REPMGR_DEFAULT_ELECTION_RETRY	(10 * US_PER_SEC)
 #define	DB_REPMGR_DEFAULT_CHANNEL_TIMEOUT	(5 * US_PER_SEC)
 
+/* Default preferred master automatic configuration values. */
+#define	DB_REPMGR_PREFMAS_ELECTION_RETRY	(1 * US_PER_SEC)
+#define	DB_REPMGR_PREFMAS_HEARTBEAT_MONITOR	(2 * US_PER_SEC)
+#define	DB_REPMGR_PREFMAS_HEARTBEAT_SEND	(75 * (US_PER_SEC / 100))
+#define	DB_REPMGR_PREFMAS_PRIORITY_CLIENT	75
+#define	DB_REPMGR_PREFMAS_PRIORITY_MASTER	200
+
+/* Defaults for undocumented incoming queue maximum messages. */
+#define	DB_REPMGR_DEFAULT_INQUEUE_MAX		(100 * MEGABYTE)
+#define	DB_REPMGR_INQUEUE_REDZONE_PERCENT	85
+
 typedef TAILQ_HEAD(__repmgr_conn_list, __repmgr_connection) CONNECTION_LIST;
 typedef STAILQ_HEAD(__repmgr_out_q_head, __queued_output) OUT_Q_HEADER;
 typedef TAILQ_HEAD(__repmgr_retry_q, __repmgr_retry) RETRY_Q_HEADER;
@@ -170,14 +203,20 @@ struct __repmgr_runnable {
 /*
  * Options governing requested behavior of election thread.
  */
-#define	ELECT_F_EVENT_NOTIFY	0x01 /* Notify application of master failure. */
-#define	ELECT_F_FAST		0x02 /* First election "fast" (n-1 trick). */
-#define	ELECT_F_IMMED		0x04 /* Start with immediate election. */
-#define	ELECT_F_INVITEE		0x08 /* Honor (remote) inviter's nsites. */
-#define	ELECT_F_STARTUP		0x10 /* Observe repmgr_start() policy. */
+#define	ELECT_F_CLIENT_RESTART	0x01 /* Do client restarts but no elections. */
+#define	ELECT_F_EVENT_NOTIFY	0x02 /* Notify application of master failure. */
+#define	ELECT_F_FAST		0x04 /* First election "fast" (n-1 trick). */
+#define	ELECT_F_IMMED		0x08 /* Start with immediate election. */
+#define	ELECT_F_INVITEE		0x10 /* Honor (remote) inviter's nsites. */
+#define	ELECT_F_STARTUP		0x20 /* Observe repmgr_start() policy. */
 		u_int32_t flags;
 
-		int eid;	/* For Connector thread. */
+		/* For connector thread. */
+		struct {
+			int eid;
+#define CONNECT_F_REFRESH	0x01 /* New connection to replace old one. */
+			u_int32_t flags;
+		} conn_th;
 
 		/*
 		 * Args for other thread types can be added here in the future
@@ -265,6 +304,7 @@ struct __queued_output {
  */
 typedef struct __repmgr_message {
 	STAILQ_ENTRY(__repmgr_message) entries;
+	size_t size;
 	__repmgr_msg_hdr_args msg_hdr;
 	union {
 		struct {
@@ -343,6 +383,7 @@ struct __repmgr_connection {
 #define	CONN_PARAMETERS	5	/* Awaiting parameters handshake. */
 #define	CONN_READY	6	/* Everything's fine. */
 	int state;
+	u_int32_t auto_takeover;/* Connection to remote listener candidate. */
 
 	/*
 	 * Input: while we're reading a message, we keep track of what phase
@@ -464,6 +505,8 @@ typedef struct {
 	SITEADDR addr;		/* Unprocessed network address of site. */
 	u_int32_t config;	/* Configuration flags: peer, helper, etc. */
 	u_int32_t status;	/* Group membership status. */
+	u_int32_t flags;	/* Group membership flags. */
+	u_int32_t listener_cand;/* Number of listener candidates of site. */
 } SITEINFO;
 
 /*
@@ -489,6 +532,42 @@ typedef struct {
 	     ((u_int)i) < db_rep->site_cnt;		 \
 	     (int)(++(i)) == db_rep->self_eid ? ++(i) : i)
 
+/*
+ * Enable replication manager auto listener takeover.
+ */
+#define	HAVE_REPLICATION_LISTENER_TAKEOVER	1
+
+/* Listener candidate, that is subordinate rep-aware process. */
+#define	IS_LISTENER_CAND(db_rep)					\
+	(FLD_ISSET((db_rep)->region->config, REP_C_AUTOTAKEOVER) &&	\
+	    IS_SUBORDINATE(db_rep) && (db_rep)->repmgr_status == running)
+
+/*
+ * The number of listener candidates for each remote site is maintained in
+ * the listener process and used in subordinate rep-aware processes.
+ */
+#define	SET_LISTENER_CAND(cond, op)					\
+	do {								\
+		if (FLD_ISSET(rep->config, REP_C_AUTOTAKEOVER) &&	\
+		    !IS_SUBORDINATE(db_rep) && (cond)) {		\
+			MUTEX_LOCK(env, rep->mtx_repmgr);		\
+			sites = R_ADDR(env->reginfo, rep->siteinfo_off);\
+			(sites[eid].listener_cand)op;			\
+			MUTEX_UNLOCK(env, rep->mtx_repmgr);		\
+		}							\
+	} while (0)
+
+#define	CHECK_LISTENER_CAND(val, op, tval, fval)			\
+	do {								\
+		if (IS_LISTENER_CAND(db_rep)) {				\
+			MUTEX_LOCK(env, rep->mtx_repmgr);		\
+			sites = R_ADDR(env->reginfo, rep->siteinfo_off);\
+			val = ((sites[eid].listener_cand)op) ?		\
+			    (tval) : (fval);				\
+			MUTEX_UNLOCK(env, rep->mtx_repmgr);		\
+		}							\
+	} while (0)
+
 struct __repmgr_site {
 	repmgr_netaddr_t net_addr;
 
@@ -499,12 +578,14 @@ struct __repmgr_site {
 	 * host/port network address is promised to be associated with the
 	 * locally known EID for the life of the environment.
 	 */
-	u_int32_t	membership; /* Status flags from GMDB. */
+	u_int32_t	membership; /* Status value from GMDB. */
+	u_int32_t	gmdb_flags; /* Flags from GMDB. */
 	u_int32_t	config;	    /* Flags from site->set_config() */
 
 	/*
 	 * Everything below here is applicable only to remote sites.
 	 */
+	u_int32_t max_ack_gen;	/* Master generation for max_ack. */
 	DB_LSN max_ack;		/* Best ack we've heard from this site. */
 	int ack_policy;		/* Or 0 if unknown. */
 	u_int16_t alignment;	/* Requirements for app channel msgs. */
@@ -604,11 +685,11 @@ struct __channel {
  * connections may be found: (1) SITE->ref.conn, (2) SITE->sub_conns, and
  * (3) db_rep->connections.
  *
- * 1. SITE->ref.conn points to our connection with the main process running
- * at the given site, if such a connection exists.  We may have initiated
- * the connection to the site ourselves, or we may have received it as an
- * incoming connection.  Once it is established there is very little
- * difference between those two cases.
+ * 1. SITE->ref.conn points to our connection with the listener process
+ * running at the given site, if such a connection exists.  We may have
+ * initiated the connection to the site ourselves, or we may have received
+ * it as an incoming connection.  Once it is established there is very
+ * little difference between those two cases.
  *
  * 2. SITE->sub_conns is a list of connections we have with subordinate
  * processes running at the given site.  There can be any number of these
@@ -694,6 +775,7 @@ struct __channel {
  */
 #define	APP_CHANNEL_CONNECTION	0x02	/* Connection used for app channel. */
 #define	ELECTABLE_SITE		0x04
+#define	REPMGR_AUTOTAKEOVER	0x08	/* Could become main connection. */
 #define	REPMGR_SUBORDINATE	0x01	/* This is a subordinate connection. */
 
 /*
@@ -719,13 +801,20 @@ typedef struct {
  * As with message formats, stored formats are defined in repmgr.msg.
  */
 /*
- * Flags for the Group Membership data portion of a record.  Like message type
- * codes, these values are frozen across releases, in order to avoid pointless
- * churn.
+ * Status values for the Group Membership data portion of a record.  Like
+ * message type codes, these values are frozen across releases, in order to
+ * avoid pointless churn.  These values are mutually exclusive.
  */
 #define	SITE_ADDING	0x01
 #define	SITE_DELETING	0x02
 #define	SITE_PRESENT	0x04
+/*
+ * Flags for the Group Membership data portion of a record.  These values are
+ * also frozen across releases.  These values are bit fields and may be OR'ed
+ * together.
+ */
+#define	SITE_VIEW		0x01
+#define	SITE_JOIN_ELECTABLE	0x02
 
 /*
  * Message types whose processing could take a long time.  We're careful to
@@ -755,9 +844,9 @@ typedef struct {
  * fraction of the code, it's a tiny fraction of the time: repmgr spends most of
  * its time in a call to select(), and as well a bit in calls into the Base
  * replication API.  All of those release the mutex.
- *     Access to repmgr's shared list of site addresses is protected by
- * another mutex: mtx_repmgr.  And, when changing space allocation for that site
- * list we conform to the convention of acquiring renv->mtx_regenv.  These are
+ *     Access to repmgr's shared values is protected by another mutex: 
+ * mtx_repmgr.  And, when changing space allocation for that site list
+ * we conform to the convention of acquiring renv->mtx_regenv.  These are
  * less frequent of course.
  *     When it's necessary to acquire more than one of these mutexes, the
  * ordering priority (or "lock ordering protocol") is:
diff --git a/src/dbinc/shqueue.h b/src/dbinc/shqueue.h
index 22464462..20e0fae7 100644
--- a/src/dbinc/shqueue.h
+++ b/src/dbinc/shqueue.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -140,6 +140,17 @@ struct {								\
 	((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next)))
 
   /*
+   * __SH_LIST_WAS_EMPTY is private API.  SH_LIST_FIRST is not thread-safe;
+   * the slh_first field could be evaluated multiple times if the optimizer
+   * does not eliminate the second load.  __SH_LIST_WAS_EMPTY tests whether a
+   * prior call of SH_LIST_FIRSTP occurred while the list was empty; i.e., its
+   * relative offset was -1. It is thread-safe to call SH_LIST_FIRSTP and then
+   * test the resulting pointer with __SH_LIST_WAS_EMPTY.
+ */
+#define	__SH_LIST_WAS_EMPTY(head, ptr)					\
+	((u_int8_t *)(ptr) == (((u_int8_t *)(head)) + (-1)))
+
+  /*
    *__SH_LIST_PREV_OFF is private API.  It calculates the address of
    * the elm->field.sle_next member of a SH_LIST structure.  All offsets
    * between elements are relative to that point in SH_LIST structures.
diff --git a/src/dbinc/tcl_db.h b/src/dbinc/tcl_db.h
index 4c56164f..99992467 100644
--- a/src/dbinc/tcl_db.h
+++ b/src/dbinc/tcl_db.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -16,7 +16,7 @@ extern "C" {
 #define	MSG_SIZE 100		/* Message size */
 
 enum INFOTYPE {
-	I_AUX, I_DB, I_DBC, I_ENV, I_LOCK, I_LOGC, I_MP, I_NDBM, I_PG, I_SEQ, I_TXN};
+	I_AUX, I_DB, I_DBC, I_DBSTREAM, I_ENV, I_LOCK, I_LOGC, I_MP, I_NDBM, I_PG, I_SEQ, I_TXN};
 
 #define	MAX_ID		8	/* Maximum number of sub-id's we need */
 #define	DBTCL_PREP	64	/* Size of txn_recover preplist */
@@ -24,9 +24,11 @@ enum INFOTYPE {
 #define	DBTCL_DBM	1
 #define	DBTCL_NDBM	2
 
-#define	DBTCL_GETCLOCK		0
-#define	DBTCL_GETLIMIT		1
-#define	DBTCL_GETREQ		2
+#define	DBTCL_GETCLOCK			0
+#define	DBTCL_GETINQUEUE_MAX		1
+#define	DBTCL_GETINQUEUE_REDZONE	2
+#define	DBTCL_GETLIMIT			3	
+#define	DBTCL_GETREQ			4
 
 #define	DBTCL_MUT_ALIGN	0
 #define	DBTCL_MUT_INCR	1
@@ -36,9 +38,11 @@ enum INFOTYPE {
 
 /*
  * Data structure to record information about events that have occurred.  Tcl
- * command "env event_info" can retrieve the information.  For now, we record
- * only one occurrence per event type; "env event_info -clear" can be used to
- * reset the info.
+ * command "env event_info" can retrieve all the information except the number
+ * of times, and "env event_count" can retrieve the number of times a specific
+ * event is fired.  We added "env event_count" instead of merging the times
+ * information into "env event_info" to avoid breaking the existing tests.
+ * Tcl command "env event_info -clear" can be used to reset the info.
  *
  * Besides the bit flag that records the fact that an event type occurred, some
  * event types have associated "info" and we record that here too.  When new
@@ -47,16 +51,17 @@ enum INFOTYPE {
  * with the "env event_info" results.
  */
 typedef struct dbtcl_event_info {
-	u_int32_t	events;	/* Bit flag on for each event fired. */
-	int		panic_error;
-	int		newmaster_eid;
-	int		added_eid;
-	int		removed_eid;
-	pid_t		attached_process;
-	int		connected_eid;
+	u_int32_t	  events;	/* Bit flag on for each event fired. */
+	int		  panic_error;
+	int		  newmaster_eid;
+	int		  added_eid;
+	int		  removed_eid;
+	pid_t		  attached_process;
+	int		  connected_eid;
 	DB_REPMGR_CONN_ERR conn_broken_info;
 	DB_REPMGR_CONN_ERR conn_failed_try_info;
-	DB_LSN		sync_point;
+	DB_LSN		  sync_point;
+	size_t		  count[32]; /* The number of times for each event. */
 } DBTCL_EVENT_INFO;
 
 /*
@@ -99,6 +104,7 @@ typedef struct dbtcl_info {
 		DB_LOCK *lock;
 		DB_LOGC *logc;
 		DB_MPOOLFILE *mp;
+		DB_STREAM *dbsp;
 		DB_TXN *txnp;
 		void *anyp;
 	} un;
@@ -128,6 +134,7 @@ typedef struct dbtcl_info {
 	Tcl_Obj *i_isalive;
 	Tcl_Obj *i_part_callback;
 	Tcl_Obj *i_rep_send;
+	Tcl_Obj *i_rep_view;
 	Tcl_Obj *i_second_call;
 
 	/* Environment ID for the i_rep_send callback. */
@@ -144,6 +151,7 @@ typedef struct dbtcl_info {
 #define	i_anyp un.anyp
 #define	i_dbp un.dbp
 #define	i_dbcp un.dbcp
+#define	i_dbsp un.dbsp
 #define	i_envp un.envp
 #define	i_lock un.lock
 #define	i_logc un.logc
@@ -170,6 +178,8 @@ typedef struct dbtcl_info {
 
 #define	i_dbdbcid i_otherid[0]
 
+#define	i_dbcdbsid i_otherid[0]
+
 extern int __debug_on, __debug_print, __debug_stop, __debug_test;
 
 typedef struct dbtcl_global {
@@ -202,6 +212,7 @@ extern DBTCL_GLOBAL __dbtcl_global;
  * functions this will typically go before the "free" function to free the
  * stat structure returned by DB.
  */
+#ifdef HAVE_STATISTICS
 #define	MAKE_STAT_LIST(s, v) do {					\
 	result = _SetListElemInt(interp, res, (s), (long)(v));		\
 	if (result != TCL_OK)						\
@@ -213,6 +224,11 @@ extern DBTCL_GLOBAL __dbtcl_global;
 	if (result != TCL_OK)						\
 		goto error;						\
 } while (0)
+#else
+/* These do-nothing versions streamline the code & reduce warning messages. */
+#define	MAKE_STAT_LIST(s, v)	if (0) goto error
+#define	MAKE_WSTAT_LIST(s, v)	if (0) goto error
+#endif
 
 /*
  * MAKE_STAT_LSN appends a {name {LSNfile LSNoffset}} pair to a result list
@@ -257,13 +273,14 @@ extern DBTCL_GLOBAL __dbtcl_global;
  * This macro also assumes a label "error" to go to in the event of a Tcl
  * error.
  */
-#define	MAKE_SITE_LIST(e, h, p, s, pr) do {				\
-	myobjc = 5;							\
+#define	MAKE_SITE_LIST(e, h, p, s, pr, vw) do {				\
+	myobjc = 6;							\
 	myobjv[0] = Tcl_NewIntObj(e);					\
 	myobjv[1] = Tcl_NewStringObj((h), (int)strlen(h));		\
 	myobjv[2] = Tcl_NewIntObj((int)p);				\
 	myobjv[3] = Tcl_NewStringObj((s), (int)strlen(s));		\
 	myobjv[4] = Tcl_NewStringObj((pr), (int)strlen(pr));		\
+	myobjv[5] = Tcl_NewStringObj((vw), (int)strlen(vw));		\
 	thislist = Tcl_NewListObj(myobjc, myobjv);			\
 	result = Tcl_ListObjAppendElement(interp, res, thislist);	\
 	if (result != TCL_OK)						\
diff --git a/src/dbinc/txn.h b/src/dbinc/txn.h
index 7cbae263..682d7c42 100644
--- a/src/dbinc/txn.h
+++ b/src/dbinc/txn.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbinc/win_db.h b/src/dbinc/win_db.h
index ba57cd1f..e22aba98 100644
--- a/src/dbinc/win_db.h
+++ b/src/dbinc/win_db.h
@@ -1,17 +1,21 @@
 /*-
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * The following provides the information necessary to build Berkeley
  * DB on native Windows, and other Windows environments such as MinGW.
  */
 
 /*
- * Berkeley DB requires at least Windows 2000, tell Visual Studio of the
- * requirement.
+ * Berkeley DB requires at least Windows 2000, and Windows XP if we are using
+ * Visual Studio 2012. Tell Visual Studio of the requirement.
  */
 #ifndef _WIN32_WINNT
+#if _MSC_VER >= 1700
+#define _WIN32_WINNT 0x0501
+#else
 #define	_WIN32_WINNT 0x0500
 #endif
+#endif
 
 #ifndef DB_WINCE
 #include <sys/types.h>
@@ -69,12 +73,46 @@
 #endif
 #define	getpid			GetCurrentProcessId
 #define	snprintf		_snprintf
+#ifndef strcasecmp
 #define	strcasecmp		_stricmp
 #define	strncasecmp		_strnicmp
+#endif
 #define	vsnprintf		_vsnprintf
 
 #define	h_errno			WSAGetLastError()
 
+#ifdef DB_WINCE
+/* Macros used by setvbuf on WINCE */
+#ifndef _IOFBF
+#define _IOFBF			0x0000
+#endif
+#ifndef _IOLBF
+#define _IOLBF			0x0040
+#endif
+#ifndef _IONBF
+#define _IONBF			0x0004
+#endif
+/* The macros for time functions */
+#define freopen			__ce_freopen
+#define gmtime			__ce_gmtime
+#define mktime			__ce_mktime
+#define remove			__ce_remove
+#define SECSPERMIN		60
+#define MINSPERHOUR		60
+#define HOURSPERDAY		24
+#define DAYSPERWEEK		7
+#define DAYSPERNYEAR		365
+#define DAYSPERLYEAR		366
+#define SECSPERHOUR		(SECSPERMIN * MINSPERHOUR)
+#define SECSPERDAY		((long) SECSPERHOUR * HOURSPERDAY)
+#define MONSPERYEAR		12
+#define TM_YEAR_BASE		1900
+#define TM_YEAR_EPOCH		1970
+#define isleap(y) ((((y) % 4) == 0 && ((y) % 100) != 0) || ((y) % 400) == 0)
+extern const __DB_IMPORT unsigned int mon_lengths[][MONSPERYEAR];
+extern const __DB_IMPORT unsigned int year_lengths[];
+#endif
+
 /*
  * Win32 does not have getopt.
  *
diff --git a/src/dbinc/xa.h b/src/dbinc/xa.h
index 7283c1ea..7b7e2cb0 100644
--- a/src/dbinc/xa.h
+++ b/src/dbinc/xa.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbinc_auto/api_flags.in b/src/dbinc_auto/api_flags.in
index 9727ede2..a10b6b62 100644
--- a/src/dbinc_auto/api_flags.in
+++ b/src/dbinc_auto/api_flags.in
@@ -36,6 +36,7 @@
 #define	DB_FLUSH				0x00000002
 #define	DB_FORCE				0x00000001
 #define	DB_FORCESYNC				0x00000001
+#define	DB_FORCESYNCENV				0x00000002
 #define	DB_FOREIGN_ABORT			0x00000001
 #define	DB_FOREIGN_CASCADE			0x00000002
 #define	DB_FOREIGN_NULLIFY			0x00000004
@@ -53,8 +54,9 @@
 #define	DB_INIT_REP				0x00001000
 #define	DB_INIT_TXN				0x00002000
 #define	DB_INORDER				0x00000020
-#define	DB_INTERNAL_PERSISTENT_DB		0x00001000
-#define	DB_INTERNAL_TEMPORARY_DB		0x00002000
+#define	DB_INTERNAL_BLOB_DB			0x00001000
+#define	DB_INTERNAL_PERSISTENT_DB		0x00002000
+#define	DB_INTERNAL_TEMPORARY_DB		0x00004000
 #define	DB_JOIN_NOSORT				0x00000001
 #define	DB_LEGACY				0x00000004
 #define	DB_LOCAL_SITE				0x00000008
@@ -67,12 +69,14 @@
 #define	DB_LOCK_SWITCH				0x00000020
 #define	DB_LOCK_UPGRADE				0x00000040
 #define	DB_LOG_AUTO_REMOVE			0x00000001
+#define	DB_LOG_BLOB				0x00000002
 #define	DB_LOG_CHKPNT				0x00000001
 #define	DB_LOG_COMMIT				0x00000004
-#define	DB_LOG_DIRECT				0x00000002
-#define	DB_LOG_DSYNC				0x00000004
-#define	DB_LOG_IN_MEMORY			0x00000008
+#define	DB_LOG_DIRECT				0x00000004
+#define	DB_LOG_DSYNC				0x00000008
+#define	DB_LOG_IN_MEMORY			0x00000010
 #define	DB_LOG_NOCOPY				0x00000008
+#define	DB_LOG_NOSYNC				0x00000020
 #define	DB_LOG_NOT_DURABLE			0x00000010
 #define	DB_LOG_NO_DATA				0x00000002
 #define	DB_LOG_VERIFY_CAF			0x00000001
@@ -84,7 +88,7 @@
 #define	DB_LOG_VERIFY_VERBOSE			0x00000040
 #define	DB_LOG_VERIFY_WARNING			0x00000080
 #define	DB_LOG_WRNOSYNC				0x00000020
-#define	DB_LOG_ZERO				0x00000010
+#define	DB_LOG_ZERO				0x00000040
 #define	DB_MPOOL_CREATE				0x00000001
 #define	DB_MPOOL_DIRTY				0x00000002
 #define	DB_MPOOL_DISCARD			0x00000001
@@ -102,17 +106,18 @@
 #define	DB_MUTEX_ALLOCATED			0x00000001
 #define	DB_MUTEX_LOCKED				0x00000002
 #define	DB_MUTEX_LOGICAL_LOCK			0x00000004
+#define	DB_MUTEX_OWNER_DEAD			0x00000020
 #define	DB_MUTEX_PROCESS_ONLY			0x00000008
 #define	DB_MUTEX_SELF_BLOCK			0x00000010
-#define	DB_MUTEX_SHARED				0x00000020
-#define	DB_NOERROR				0x00004000
+#define	DB_MUTEX_SHARED				0x00000040
+#define	DB_NOERROR				0x00008000
 #define	DB_NOFLUSH				0x00001000
 #define	DB_NOLOCKING				0x00002000
 #define	DB_NOMMAP				0x00000010
 #define	DB_NOORDERCHK				0x00000002
 #define	DB_NOPANIC				0x00004000
 #define	DB_NOSYNC				0x00000001
-#define	DB_NO_AUTO_COMMIT			0x00008000
+#define	DB_NO_AUTO_COMMIT			0x00010000
 #define	DB_NO_CHECKPOINT			0x00008000
 #define	DB_ODDFILESIZE				0x00000080
 #define	DB_ORDERCHKONLY				0x00000004
@@ -123,7 +128,7 @@
 #define	DB_PR_PAGE				0x00000010
 #define	DB_PR_RECOVERYTEST			0x00000020
 #define	DB_RDONLY				0x00000400
-#define	DB_RDWRMASTER				0x00010000
+#define	DB_RDWRMASTER				0x00020000
 #define	DB_READ_COMMITTED			0x00000400
 #define	DB_READ_UNCOMMITTED			0x00000200
 #define	DB_RECNUM				0x00000040
@@ -134,17 +139,20 @@
 #define	DB_RENUMBER				0x00000080
 #define	DB_REPMGR_CONF_2SITE_STRICT		0x00000001
 #define	DB_REPMGR_CONF_ELECTIONS		0x00000002
+#define	DB_REPMGR_CONF_PREFMAS_CLIENT		0x00000004
+#define	DB_REPMGR_CONF_PREFMAS_MASTER		0x00000008
 #define	DB_REPMGR_NEED_RESPONSE			0x00000001
 #define	DB_REPMGR_PEER				0x00000010
 #define	DB_REP_ANYWHERE				0x00000001
 #define	DB_REP_CLIENT				0x00000001
-#define	DB_REP_CONF_AUTOINIT			0x00000004
-#define	DB_REP_CONF_AUTOROLLBACK		0x00000008
-#define	DB_REP_CONF_BULK			0x00000010
-#define	DB_REP_CONF_DELAYCLIENT			0x00000020
-#define	DB_REP_CONF_INMEM			0x00000040
-#define	DB_REP_CONF_LEASE			0x00000080
-#define	DB_REP_CONF_NOWAIT			0x00000100
+#define	DB_REP_CONF_AUTOINIT			0x00000010
+#define	DB_REP_CONF_AUTOROLLBACK		0x00000020
+#define	DB_REP_CONF_BULK			0x00000040
+#define	DB_REP_CONF_DELAYCLIENT			0x00000080
+#define	DB_REP_CONF_ELECT_LOGLENGTH		0x00000100
+#define	DB_REP_CONF_INMEM			0x00000200
+#define	DB_REP_CONF_LEASE			0x00000400
+#define	DB_REP_CONF_NOWAIT			0x00000800
 #define	DB_REP_ELECTION				0x00000004
 #define	DB_REP_MASTER				0x00000002
 #define	DB_REP_NOBUFFER				0x00000002
@@ -161,8 +169,9 @@
 #define	DB_SEQ_WRAP				0x00000008
 #define	DB_SEQ_WRAPPED				0x00000010
 #define	DB_SET_LOCK_TIMEOUT			0x00000001
-#define	DB_SET_REG_TIMEOUT			0x00000004
-#define	DB_SET_TXN_NOW				0x00000008
+#define	DB_SET_MUTEX_FAILCHK_TIMEOUT		0x00000004
+#define	DB_SET_REG_TIMEOUT			0x00000008
+#define	DB_SET_TXN_NOW				0x00000010
 #define	DB_SET_TXN_TIMEOUT			0x00000002
 #define	DB_SHALLOW_DUP				0x00000100
 #define	DB_SNAPSHOT				0x00000200
@@ -188,7 +197,7 @@
 #define	DB_SYSTEM_MEM				0x00080000
 #define	DB_THREAD				0x00000020
 #define	DB_TIME_NOTGRANTED			0x00040000
-#define	DB_TRUNCATE				0x00020000
+#define	DB_TRUNCATE				0x00040000
 #define	DB_TXN_BULK				0x00000010
 #define	DB_TXN_FAMILY				0x00000040
 #define	DB_TXN_NOSYNC				0x00000001
@@ -206,23 +215,24 @@
 #define	DB_VERB_DEADLOCK			0x00000002
 #define	DB_VERB_FILEOPS				0x00000004
 #define	DB_VERB_FILEOPS_ALL			0x00000008
-#define	DB_VERB_RECOVERY			0x00000010
-#define	DB_VERB_REGISTER			0x00000020
-#define	DB_VERB_REPLICATION			0x00000040
-#define	DB_VERB_REPMGR_CONNFAIL			0x00000080
-#define	DB_VERB_REPMGR_MISC			0x00000100
-#define	DB_VERB_REP_ELECT			0x00000200
-#define	DB_VERB_REP_LEASE			0x00000400
-#define	DB_VERB_REP_MISC			0x00000800
-#define	DB_VERB_REP_MSGS			0x00001000
-#define	DB_VERB_REP_SYNC			0x00002000
-#define	DB_VERB_REP_SYSTEM			0x00004000
-#define	DB_VERB_REP_TEST			0x00008000
-#define	DB_VERB_WAITSFOR			0x00010000
+#define	DB_VERB_MVCC				0x00000010
+#define	DB_VERB_RECOVERY			0x00000020
+#define	DB_VERB_REGISTER			0x00000040
+#define	DB_VERB_REPLICATION			0x00000080
+#define	DB_VERB_REPMGR_CONNFAIL			0x00000100
+#define	DB_VERB_REPMGR_MISC			0x00000200
+#define	DB_VERB_REP_ELECT			0x00000400
+#define	DB_VERB_REP_LEASE			0x00000800
+#define	DB_VERB_REP_MISC			0x00001000
+#define	DB_VERB_REP_MSGS			0x00002000
+#define	DB_VERB_REP_SYNC			0x00004000
+#define	DB_VERB_REP_SYSTEM			0x00008000
+#define	DB_VERB_REP_TEST			0x00010000
+#define	DB_VERB_WAITSFOR			0x00020000
 #define	DB_VERIFY				0x00000002
 #define	DB_VERIFY_PARTITION			0x00040000
 #define	DB_WRITECURSOR				0x00000010
 #define	DB_WRITELOCK				0x00000020
-#define	DB_WRITEOPEN				0x00040000
+#define	DB_WRITEOPEN				0x00080000
 #define	DB_XA_CREATE				0x00000001
 #define	DB_YIELDCPU				0x00080000
diff --git a/src/dbinc_auto/blob_ext.h b/src/dbinc_auto/blob_ext.h
new file mode 100644
index 00000000..3eac5c8d
--- /dev/null
+++ b/src/dbinc_auto/blob_ext.h
@@ -0,0 +1,41 @@
+/* DO NOT EDIT: automatically built by dist/s_include. */
+#ifndef	_blob_ext_h_
+#define	_blob_ext_h_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+int __blob_file_create __P ((DBC *, DB_FH **, db_seq_t *));
+int  __blob_file_close __P ((DBC *, DB_FH *, u_int32_t));
+int __blob_file_delete __P((DBC *, db_seq_t));
+int __blob_file_open __P((DB *, DB_FH **, db_seq_t, u_int32_t, int));
+int __blob_file_read __P((ENV *, DB_FH *, DBT *, off_t, u_int32_t));
+int __blob_file_write __P((DBC *, DB_FH *, DBT *, off_t, db_seq_t, off_t *, u_int32_t));
+int __blob_bulk __P((DBC *, u_int32_t, db_seq_t, u_int8_t *));
+int __blob_get __P((DBC *, DBT *, db_seq_t, off_t, void **, u_int32_t *));
+int __blob_put __P(( DBC *, DBT *, db_seq_t *, off_t *size, DB_LSN *));
+int __blob_repl __P((DBC *, DBT *, db_seq_t, db_seq_t *,off_t *));
+int __blob_del __P((DBC *, db_seq_t));
+int __db_stream_init __P((DBC *, DB_STREAM **, u_int32_t));
+int __db_stream_close_int __P ((DB_STREAM *));
+int __blob_make_sub_dir __P((ENV *, char **, db_seq_t, db_seq_t));
+int __blob_make_meta_fname __P((ENV *, DB *, char **));
+int __blob_get_dir __P((DB *, char **));
+int __blob_generate_dir_ids __P((DB *, DB_TXN *, db_seq_t *));
+int __blob_generate_id __P((DB *, DB_TXN *, db_seq_t *));
+int __blob_highest_id __P((DB *, DB_TXN *, db_seq_t *));
+void __blob_calculate_dirs __P((db_seq_t, char *, int *, int *));
+int __blob_id_to_path __P((ENV *, const char *, db_seq_t, char **));
+int __blob_str_to_id __P((ENV *, const char **, db_seq_t *));
+int __blob_path_to_dir_ids __P((ENV *, const char *, db_seq_t *, db_seq_t *));
+int __blob_salvage __P((ENV *, db_seq_t, off_t, size_t, db_seq_t, db_seq_t, DBT *));
+int __blob_vrfy __P((ENV *, db_seq_t, off_t, db_seq_t, db_seq_t, db_pgno_t, u_int32_t));
+int __blob_del_hierarchy __P((ENV *));
+int __blob_del_all __P((DB *, DB_TXN *, int));
+int __blob_copy_all __P((DB*, const char *, u_int32_t));
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_blob_ext_h_ */
diff --git a/src/dbinc_auto/btree_ext.h b/src/dbinc_auto/btree_ext.h
index c90f5b80..bdd95750 100644
--- a/src/dbinc_auto/btree_ext.h
+++ b/src/dbinc_auto/btree_ext.h
@@ -8,11 +8,11 @@ extern "C" {
 
 int __bam_compact_int __P((DBC *, DBT *, DBT *, u_int32_t, int *, DB_COMPACT *, int *));
 int __bam_compact_opd __P((DBC *, db_pgno_t, PAGE **, u_int32_t, DB_COMPACT *, int *));
-int __bam_truncate_ipages __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB_COMPACT *));
-int __bam_cmp __P((DBC *, const DBT *, PAGE *, u_int32_t, int (*)(DB *, const DBT *, const DBT *), int *));
-int __bam_defcmp __P((DB *, const DBT *, const DBT *));
+int __bam_truncate_ipages __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB_COMPACT *, int *));
+int __bam_cmp __P((DBC *, const DBT *, PAGE *, u_int32_t, int (*)(DB *, const DBT *, const DBT *, size_t *), int *, size_t *));
+int __bam_defcmp __P((DB *, const DBT *, const DBT *, size_t *));
 size_t __bam_defpfx __P((DB *, const DBT *, const DBT *));
-int __bam_compress_dupcmp __P((DB *, const DBT *, const DBT *));
+int __bam_compress_dupcmp __P((DB *, const DBT *, const DBT *, size_t *));
 int __bam_defcompress __P((DB *, const DBT *, const DBT *, const DBT *, const DBT *, DBT *));
 int __bam_defdecompress __P((DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *));
 int __bamc_compress_get __P((DBC *, DBT *, DBT *, u_int32_t));
@@ -52,7 +52,7 @@ int __bam_db_create __P((DB *));
 int __bam_db_close __P((DB *));
 void __bam_map_flags __P((DB *, u_int32_t *, u_int32_t *));
 int __bam_set_flags __P((DB *, u_int32_t *flagsp));
-int __bam_set_bt_compare __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+int __bam_set_bt_compare __P((DB *, int (*)(DB *, const DBT *, const DBT *, size_t *)));
 int __bam_set_bt_compress __P((DB *, int (*)(DB *, const DBT *, const DBT *, const DBT *, const DBT *, DBT *), int (*)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *)));
 int __bam_get_bt_minkey __P((DB *, u_int32_t *));
 void __bam_copy_config __P((DB *, DB*, u_int32_t));
@@ -115,6 +115,8 @@ int __bam_traverse __P((DBC *, db_lockmode_t, db_pgno_t, int (*)(DBC *, PAGE *,
 int __bam_30_btreemeta __P((DB *, char *, u_int8_t *));
 int __bam_31_btreemeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
 int __bam_31_lbtree __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+int __bam_60_btreemeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+int __bam_60_lbtree __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
 int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *, db_pgno_t, u_int32_t));
 int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
 int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
diff --git a/src/dbinc_auto/common_ext.h b/src/dbinc_auto/common_ext.h
index ac16e9db..1a94d3a1 100644
--- a/src/dbinc_auto/common_ext.h
+++ b/src/dbinc_auto/common_ext.h
@@ -25,6 +25,7 @@ int __db_pgfmt __P((ENV *, db_pgno_t));
 #ifdef DIAGNOSTIC
 void __db_assert __P((ENV *, const char *, const char *, int));
 #endif
+void __env_panic_event __P((ENV *, int));
 int __env_panic_msg __P((ENV *));
 int __env_panic __P((ENV *, int));
 char *__db_unknown_error __P((int));
@@ -33,9 +34,10 @@ void __db_err __P((const ENV *, int, const char *, ...)) __attribute__ ((__forma
 void __db_errx __P((const ENV *, const char *, ...)) __attribute__ ((__format__ (__printf__, 2, 3)));
 void __db_errcall __P((const DB_ENV *, int, db_error_set_t, const char *, va_list));
 void __db_errfile __P((const DB_ENV *, int, db_error_set_t, const char *, va_list));
-void __db_msgadd __P((ENV *, DB_MSGBUF *, const char *, ...)) __attribute__ ((__format__ (__printf__, 3, 4)));
-void __db_msgadd_ap __P((ENV *, DB_MSGBUF *, const char *, va_list));
+void __db_msgadd __P((const ENV *, DB_MSGBUF *, const char *, ...)) __attribute__ ((__format__ (__printf__, 3, 4)));
+void __db_msgadd_ap __P((const ENV *, DB_MSGBUF *, const char *, va_list));
 void __db_msg __P((const ENV *, const char *, ...)) __attribute__ ((__format__ (__printf__, 2, 3)));
+void __db_debug_msg __P((const ENV *, const char *, ...));
 void __db_repmsg __P((const ENV *, const char *, ...)) __attribute__ ((__format__ (__printf__, 2, 3)));
 int __db_unknown_flag __P((ENV *, char *, u_int32_t));
 int __db_unknown_type __P((ENV *, char *, DBTYPE));
@@ -50,6 +52,24 @@ int __db_check_lsn __P((ENV *, DB_LSN *, DB_LSN *));
 int __db_rdonly __P((const ENV *, const char *));
 int __db_space_err __P((const DB *));
 int __db_failed __P((const ENV *, const char *, pid_t, db_threadid_t));
+int __env_failure_remember __P((const ENV *, const char *));
+#ifdef HAVE_ERROR_HISTORY
+void __db_thread_init __P((void));
+#endif
+#ifdef HAVE_ERROR_HISTORY
+int __db_diags __P((const ENV *, int));
+#endif
+#ifdef HAVE_ERROR_HISTORY
+DB_MSGBUF *__db_deferred_get __P((void));
+#endif
+#ifdef HAVE_ERROR_HISTORY
+void __db_deferred_discard __P((void));
+#endif
+#ifdef HAVE_ERROR_HISTORY
+int __db_remember_context __P((const ENV *, DB_MSGBUF *, int));
+#endif
+char * __db_ctimespec __P((const db_timespec *, char *));
+char *__db_fmt_quote __P((char *, size_t, const char *));
 int __db_getlong __P((DB_ENV *, const char *, char *, long, long, long *));
 int __db_getulong __P((DB_ENV *, const char *, char *, u_long, u_long, u_long *));
 void __db_idspace __P((u_int32_t *, int, u_int32_t *, u_int32_t *));
diff --git a/src/dbinc_auto/db_ext.h b/src/dbinc_auto/db_ext.h
index de2a6ce4..719fc0c5 100644
--- a/src/dbinc_auto/db_ext.h
+++ b/src/dbinc_auto/db_ext.h
@@ -62,14 +62,19 @@ int __db_merge_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __db_pgno_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __db_init_print __P((ENV *, DB_DISTAB *));
 int __db_dbbackup_pp __P((DB_ENV *, const char *, const char *, u_int32_t));
-int __db_dbbackup __P((DB_ENV *, DB_THREAD_INFO *, const char *, const char *, u_int32_t));
-int __db_backup __P((DB_ENV *, const char *, u_int32_t));
+int __db_dbbackup __P((DB_ENV *, DB_THREAD_INFO *, const char *, const char *, u_int32_t, u_int32_t, const char *));
+int backup_data_copy __P(( DB_ENV *, const char *, const char *, const char *, int));
+int __db_backup_pp __P((DB_ENV *, const char *, u_int32_t));
 int __dbc_close __P((DBC *));
 int __dbc_destroy __P((DBC *));
 int __dbc_cmp __P((DBC *, DBC *, int *));
 int __dbc_count __P((DBC *, db_recno_t *));
 int __dbc_del __P((DBC *, u_int32_t));
 int __dbc_idel __P((DBC *, u_int32_t));
+int __dbc_db_stream __P((DBC *, DB_STREAM **, u_int32_t));
+int __dbc_get_blob_id __P((DBC *, db_seq_t *));
+int __dbc_get_blob_size __P((DBC *, off_t *));
+int __dbc_set_blob_size __P((DBC *, off_t));
 #ifdef HAVE_COMPRESSION
 int __dbc_bulk_del __P((DBC *, DBT *, u_int32_t));
 #endif
@@ -93,15 +98,16 @@ u_int32_t __db_partsize __P((u_int32_t, DBT *));
 #ifdef DIAGNOSTIC
 void __db_check_skeyset __P((DB *, DBT *));
 #endif
+int __dbc_diags __P((DBC *, int));
 int __cdsgroup_begin __P((ENV *, DB_TXN **));
 int __cdsgroup_begin_pp __P((DB_ENV *, DB_TXN **));
 int __db_compact_int __P((DB *, DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *));
-int __db_exchange_page __P((DBC *, PAGE **, PAGE *, db_pgno_t, int));
-int __db_truncate_overflow __P((DBC *, db_pgno_t, PAGE **, DB_COMPACT *));
-int __db_truncate_root __P((DBC *, PAGE *, u_int32_t, db_pgno_t *, u_int32_t));
+int __db_exchange_page __P((DBC *, PAGE **, PAGE *, db_pgno_t, int, int *));
+int __db_truncate_overflow __P((DBC *, db_pgno_t, PAGE **, DB_COMPACT *, int *));
+int __db_truncate_root __P((DBC *, PAGE *, u_int32_t, db_pgno_t *, u_int32_t, int *));
 int __db_find_free __P((DBC *, u_int32_t, u_int32_t, db_pgno_t, db_pgno_t *));
 int __db_relink __P((DBC *, PAGE *, PAGE *, db_pgno_t));
-int __db_move_metadata __P((DBC *, DBMETA **, DB_COMPACT *));
+int __db_move_metadata __P((DBC *, DBMETA **, DB_COMPACT *, int *));
 int __db_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *));
 int __db_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *));
 int __db_decrypt_pg __P((ENV *, DB *, PAGE *));
@@ -185,6 +191,10 @@ int __db_has_pagelock __P((ENV *, DB_LOCKER *, DB_MPOOLFILE *, PAGE *, db_lockmo
 int __db_lput __P((DBC *, DB_LOCK *));
 int __db_create_internal  __P((DB **, ENV *, u_int32_t));
 int __dbh_am_chk __P((DB *, u_int32_t));
+int __db_get_blob_threshold __P((DB *, u_int32_t *));
+int __db_set_blob_threshold __P((DB *, u_int32_t, u_int32_t));
+int __db_blobs_enabled __P((DB *));
+int __db_set_dup_compare __P((DB *, int (*)(DB *, const DBT *, const DBT *, size_t *)));
 int __db_get_flags __P((DB *, u_int32_t *));
 int  __db_set_flags __P((DB *, u_int32_t));
 int  __db_get_lorder __P((DB *, int *));
@@ -197,12 +207,13 @@ int __db_init_subdb __P((DB *, DB *, const char *, DB_THREAD_INFO *, DB_TXN *));
 int __db_chk_meta __P((ENV *, DB *, DBMETA *, u_int32_t));
 int __db_meta_setup __P((ENV *, DB *, const char *, DBMETA *, u_int32_t, u_int32_t));
 int __db_reopen __P((DBC *));
+int __db_alloc_dbt __P((ENV *, DBT *, u_int32_t, u_int32_t *, u_int32_t *, void **, u_int32_t *));
 int __db_goff __P((DBC *, DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *));
 int __db_poff __P((DBC *, const DBT *, db_pgno_t *));
 int __db_ovref __P((DBC *, db_pgno_t));
 int __db_doff __P((DBC *, db_pgno_t));
-int __db_moff __P((DBC *, const DBT *, db_pgno_t, u_int32_t, int (*)(DB *, const DBT *, const DBT *), int *));
-int __db_coff __P((DBC *, const DBT *, const DBT *, int (*)(DB *, const DBT *, const DBT *), int *));
+int __db_moff __P((DBC *, const DBT *, db_pgno_t, u_int32_t, int (*)(DB *, const DBT *, const DBT *, size_t *), int *, size_t *));
+int __db_coff __P((DBC *, const DBT *, const DBT *, int (*)(DB *, const DBT *, const DBT *, size_t *), int *));
 int __db_vrfy_overflow __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
 int __db_vrfy_ovfl_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, u_int32_t));
 int __db_safe_goff __P((DB *, VRFY_DBINFO *, db_pgno_t, DBT *, void *, u_int32_t *, u_int32_t));
@@ -221,11 +232,12 @@ int __db_name_to_val __P((FN const *, char *));
 const char *__db_pagetype_to_string __P((u_int32_t));
 int __db_dump_pp __P((DB *, const char *, int (*)(void *, const void *), void *, int, int));
 int __db_dump __P((DB *, const char *, int (*)(void *, const void *), void *, int, int));
-int __db_prdbt __P((DBT *, int, const char *, void *, int (*)(void *, const void *), int, int));
+int __db_prdbt __P((DBT *, int, const char *, void *, int (*)(void *, const void *), int, int, int));
 int	__db_prheader __P((DB *, const char *, int, int, void *, int (*)(void *, const void *), VRFY_DBINFO *, db_pgno_t));
 int __db_prfooter __P((void *, int (*)(void *, const void *)));
 int  __db_pr_callback __P((void *, const void *));
 const char * __db_dbtype_to_string __P((DBTYPE));
+char *__db_tohex __P((const void *, size_t, char *));
 int __db_addrem_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __db_addrem_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __db_big_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
@@ -263,6 +275,8 @@ int __db_rename_pp __P((DB *, const char *, const char *, const char *, u_int32_
 int __db_rename_int __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, const char *, u_int32_t));
 int __db_ret __P((DBC *, PAGE *, u_int32_t, DBT *, void **, u_int32_t *));
 int __db_retcopy __P((ENV *, DBT *, void *, u_int32_t, void **, u_int32_t *));
+int __db_dbt_clone __P((ENV *, DBT *, const DBT *));
+int __db_dbt_clone_free __P((ENV *, DBT *));
 int __env_fileid_reset_pp __P((DB_ENV *, const char *, u_int32_t));
 int __env_fileid_reset __P((ENV *, DB_THREAD_INFO *, const char *, int));
 int __env_lsn_reset_pp __P((DB_ENV *, const char *, u_int32_t));
@@ -329,6 +343,7 @@ int __part_fileid_reset __P((ENV *, DB_THREAD_INFO *, const char *, u_int32_t, i
 int __part_key_range __P((DBC *, DBT *, DB_KEY_RANGE *, u_int32_t));
 int __part_remove __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, u_int32_t));
 int __part_rename __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, const char *));
+int __partc_dup __P((DBC *, DBC *));
 int __part_verify __P((DB *, VRFY_DBINFO *, const char *, void *, int (*)(void *, const void *), u_int32_t));
 int __part_testdocopy __P((DB *, const char *));
 int __db_no_partition __P((ENV *));
diff --git a/src/dbinc_auto/dbreg_auto.h b/src/dbinc_auto/dbreg_auto.h
index 63ad0cd3..22f1e84c 100644
--- a/src/dbinc_auto/dbreg_auto.h
+++ b/src/dbinc_auto/dbreg_auto.h
@@ -3,6 +3,28 @@
 #ifndef	__dbreg_AUTO_H
 #define	__dbreg_AUTO_H
 #include "dbinc/log.h"
+#define	DB___dbreg_register_42	2
+typedef struct ___dbreg_register_42_args {
+	u_int32_t type;
+	DB_TXN *txnp;
+	DB_LSN prev_lsn;
+	u_int32_t	opcode;
+	DBT	name;
+	DBT	uid;
+	int32_t	fileid;
+	DBTYPE	ftype;
+	db_pgno_t	meta_pgno;
+	u_int32_t	id;
+} __dbreg_register_42_args;
+
+extern __DB_IMPORT DB_LOG_RECSPEC __dbreg_register_42_desc[];
+static inline int __dbreg_register_42_read(ENV *env, 
+    void *data, __dbreg_register_42_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    NULL, NULL, data, __dbreg_register_42_desc, sizeof(__dbreg_register_42_args), (void**)arg));
+}
 #define	DB___dbreg_register	2
 typedef struct ___dbreg_register_args {
 	u_int32_t type;
@@ -15,22 +37,25 @@ typedef struct ___dbreg_register_args {
 	DBTYPE	ftype;
 	db_pgno_t	meta_pgno;
 	u_int32_t	id;
+	u_int32_t	blob_fid_lo;
+	u_int32_t	blob_fid_hi;
 } __dbreg_register_args;
 
 extern __DB_IMPORT DB_LOG_RECSPEC __dbreg_register_desc[];
 static inline int
 __dbreg_register_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags,
     u_int32_t opcode, const DBT *name, const DBT *uid, int32_t fileid, DBTYPE ftype,
-    db_pgno_t meta_pgno, u_int32_t id)
+    db_pgno_t meta_pgno, u_int32_t id, u_int32_t blob_fid_lo, u_int32_t blob_fid_hi)
 {
 	return (__log_put_record(env, NULL, txnp, ret_lsnp,
 	    flags, DB___dbreg_register, 0,
 	    sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) +
 	    sizeof(u_int32_t) + LOG_DBT_SIZE(name) + LOG_DBT_SIZE(uid) +
 	    sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) +
-	    sizeof(u_int32_t),
+	    sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t),
 	    __dbreg_register_desc,
-	    opcode, name, uid, fileid, ftype, meta_pgno, id));
+	    opcode, name, uid, fileid, ftype, meta_pgno, id, blob_fid_lo,
+	    blob_fid_hi));
 }
 
 static inline int __dbreg_register_read(ENV *env, 
diff --git a/src/dbinc_auto/dbreg_ext.h b/src/dbinc_auto/dbreg_ext.h
index 0f495c33..421c7989 100644
--- a/src/dbinc_auto/dbreg_ext.h
+++ b/src/dbinc_auto/dbreg_ext.h
@@ -20,9 +20,11 @@ int __dbreg_failchk __P((ENV *));
 int __dbreg_log_close __P((ENV *, FNAME *, DB_TXN *, u_int32_t));
 int __dbreg_log_id __P((DB *, DB_TXN *, int32_t, int));
 int __dbreg_init_recover __P((ENV *, DB_DISTAB *));
+int __dbreg_register_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __dbreg_register_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __dbreg_init_print __P((ENV *, DB_DISTAB *));
 int __dbreg_register_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __dbreg_register_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __dbreg_stat_print __P((ENV *, u_int32_t));
 void __dbreg_print_fname __P((ENV *, FNAME *));
 int __dbreg_add_dbentry __P((ENV *, DB_LOG *, DB *, int32_t));
@@ -36,8 +38,9 @@ int __dbreg_invalidate_files __P((ENV *, int));
 int __dbreg_id_to_db __P((ENV *, DB_TXN *, DB **, int32_t, int));
 int __dbreg_id_to_fname __P((DB_LOG *, int32_t, int, FNAME **));
 int __dbreg_fid_to_fname __P((DB_LOG *, u_int8_t *, int, FNAME **));
+int __dbreg_blob_file_to_fname __P((DB_LOG *, db_seq_t, int, FNAME **));
 int __dbreg_get_name __P((ENV *, u_int8_t *, char **, char **));
-int __dbreg_do_open __P((ENV *, DB_TXN *, DB_LOG *, u_int8_t *, char *, DBTYPE, int32_t, db_pgno_t, void *, u_int32_t, u_int32_t));
+int __dbreg_do_open __P((ENV *, DB_TXN *, DB_LOG *, u_int8_t *, char *, DBTYPE, int32_t, db_pgno_t, void *, u_int32_t, u_int32_t, db_seq_t));
 int __dbreg_lazy_id __P((DB *));
 
 #if defined(__cplusplus)
diff --git a/src/dbinc_auto/env_ext.h b/src/dbinc_auto/env_ext.h
index 55dbcba4..7df61ea9 100644
--- a/src/dbinc_auto/env_ext.h
+++ b/src/dbinc_auto/env_ext.h
@@ -36,9 +36,13 @@ void __db_env_destroy __P((DB_ENV *));
 int  __env_get_alloc __P((DB_ENV *, void *(**)(size_t), void *(**)(void *, size_t), void (**)(void *)));
 int  __env_set_alloc __P((DB_ENV *, void *(*)(size_t), void *(*)(void *, size_t), void (*)(void *)));
 int  __env_get_memory_init __P((DB_ENV *, DB_MEM_CONFIG, u_int32_t *));
+int  __env_get_blob_threshold_pp __P ((DB_ENV *, u_int32_t *));
+int  __env_get_blob_threshold_int __P ((ENV *, u_int32_t *));
+int  __env_set_blob_threshold __P((DB_ENV *, u_int32_t, u_int32_t));
 int  __env_set_memory_init __P((DB_ENV *, DB_MEM_CONFIG, u_int32_t));
 int  __env_get_memory_max __P((DB_ENV *, u_int32_t *, u_int32_t *));
 int  __env_set_memory_max __P((DB_ENV *, u_int32_t, u_int32_t));
+int  __env_set_blob_dir __P((DB_ENV *, const char *));
 int __env_get_encrypt_flags __P((DB_ENV *, u_int32_t *));
 int __env_set_encrypt __P((DB_ENV *, const char *, u_int32_t));
 void __env_map_flags __P((const FLAG_MAP *, u_int, u_int32_t *, u_int32_t *));
@@ -91,6 +95,7 @@ void __env_panic_set __P((ENV *, int));
 int __env_ref_increment __P((ENV *));
 int __env_ref_decrement __P((ENV *));
 int __env_ref_get __P((DB_ENV *, u_int32_t *));
+int __env_region_cleanup __P((ENV *));
 int __env_detach __P((ENV *, int));
 int __env_remove_env __P((ENV *));
 int __env_region_attach __P((ENV *, REGINFO *, size_t, size_t));
@@ -102,6 +107,7 @@ int __envreg_xunlock __P((ENV *));
 int __envreg_isalive __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t));
 u_int32_t __env_struct_sig __P((void));
 int __env_stat_print_pp __P((DB_ENV *, u_int32_t));
+int __env_print_thread __P((ENV *));
 void __db_print_fh __P((ENV *, const char *, DB_FH *, u_int32_t));
 void __db_print_fileid __P((ENV *, u_int8_t *, const char *));
 void __db_dl __P((ENV *, const char *, u_long));
@@ -119,6 +125,18 @@ int __repmgr_get_ack_policy __P((DB_ENV *, int *));
 int __repmgr_set_ack_policy __P((DB_ENV *, int));
 #endif
 #ifndef HAVE_REPLICATION_THREADS
+int __repmgr_get_incoming_queue_max __P((DB_ENV *, u_int32_t *, u_int32_t *));
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+int __repmgr_set_incoming_queue_max __P((DB_ENV *, u_int32_t, u_int32_t));
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+int __repmgr_get_incoming_queue_redzone __P((DB_ENV *, u_int32_t *, u_int32_t *));
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+int __repmgr_get_incoming_queue_fullevent __P((DB_ENV *, int *));
+#endif
+#ifndef HAVE_REPLICATION_THREADS
 int __repmgr_site __P((DB_ENV *, const char *, u_int, DB_SITE **, u_int32_t));
 #endif
 #ifndef HAVE_REPLICATION_THREADS
@@ -128,10 +146,10 @@ int __repmgr_site_by_eid __P((DB_ENV *, int, DB_SITE **));
 int __repmgr_local_site __P((DB_ENV *, DB_SITE **));
 #endif
 #ifndef HAVE_REPLICATION_THREADS
-int __repmgr_site_list __P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
+int __repmgr_site_list_pp __P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
 #endif
 #ifndef HAVE_REPLICATION_THREADS
-int __repmgr_start __P((DB_ENV *, int, u_int32_t));
+int __repmgr_start_pp __P((DB_ENV *, int, u_int32_t));
 #endif
 #ifndef HAVE_REPLICATION_THREADS
 int __repmgr_stat_pp __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t));
diff --git a/src/dbinc_auto/fileops_auto.h b/src/dbinc_auto/fileops_auto.h
index 59385c88..3894c23d 100644
--- a/src/dbinc_auto/fileops_auto.h
+++ b/src/dbinc_auto/fileops_auto.h
@@ -21,6 +21,25 @@ static inline int __fop_create_42_read(ENV *env,
 	return (__log_read_record(env, 
 	    NULL, NULL, data, __fop_create_42_desc, sizeof(__fop_create_42_args), (void**)arg));
 }
+#define	DB___fop_create_60	143
+typedef struct ___fop_create_60_args {
+	u_int32_t type;
+	DB_TXN *txnp;
+	DB_LSN prev_lsn;
+	DBT	name;
+	DBT	dirname;
+	u_int32_t	appname;
+	u_int32_t	mode;
+} __fop_create_60_args;
+
+extern __DB_IMPORT DB_LOG_RECSPEC __fop_create_60_desc[];
+static inline int __fop_create_60_read(ENV *env, 
+    void *data, __fop_create_60_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    NULL, NULL, data, __fop_create_60_desc, sizeof(__fop_create_60_args), (void**)arg));
+}
 #define	DB___fop_create	143
 typedef struct ___fop_create_args {
 	u_int32_t type;
@@ -53,6 +72,24 @@ static inline int __fop_create_read(ENV *env,
 	return (__log_read_record(env, 
 	    NULL, NULL, data, __fop_create_desc, sizeof(__fop_create_args), (void**)arg));
 }
+#define	DB___fop_remove_60	144
+typedef struct ___fop_remove_60_args {
+	u_int32_t type;
+	DB_TXN *txnp;
+	DB_LSN prev_lsn;
+	DBT	name;
+	DBT	fid;
+	u_int32_t	appname;
+} __fop_remove_60_args;
+
+extern __DB_IMPORT DB_LOG_RECSPEC __fop_remove_60_desc[];
+static inline int __fop_remove_60_read(ENV *env, 
+    void *data, __fop_remove_60_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    NULL, NULL, data, __fop_remove_60_desc, sizeof(__fop_remove_60_args), (void**)arg));
+}
 #define	DB___fop_remove	144
 typedef struct ___fop_remove_args {
 	u_int32_t type;
@@ -105,6 +142,29 @@ static inline int __fop_write_42_read(ENV *env,
 	return (__log_read_record(env, 
 	    NULL, NULL, data, __fop_write_42_desc, sizeof(__fop_write_42_args), (void**)arg));
 }
+#define	DB___fop_write_60	145
+typedef struct ___fop_write_60_args {
+	u_int32_t type;
+	DB_TXN *txnp;
+	DB_LSN prev_lsn;
+	DBT	name;
+	DBT	dirname;
+	u_int32_t	appname;
+	u_int32_t	pgsize;
+	db_pgno_t	pageno;
+	u_int32_t	offset;
+	DBT	page;
+	u_int32_t	flag;
+} __fop_write_60_args;
+
+extern __DB_IMPORT DB_LOG_RECSPEC __fop_write_60_desc[];
+static inline int __fop_write_60_read(ENV *env, 
+    void *data, __fop_write_60_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    NULL, NULL, data, __fop_write_60_desc, sizeof(__fop_write_60_args), (void**)arg));
+}
 #define	DB___fop_write	145
 typedef struct ___fop_write_args {
 	u_int32_t type;
@@ -143,6 +203,66 @@ static inline int __fop_write_read(ENV *env,
 	return (__log_read_record(env, 
 	    NULL, NULL, data, __fop_write_desc, sizeof(__fop_write_args), (void**)arg));
 }
+#define	DB___fop_write_file_60	86
+typedef struct ___fop_write_file_60_args {
+	u_int32_t type;
+	DB_TXN *txnp;
+	DB_LSN prev_lsn;
+	DBT	name;
+	DBT	dirname;
+	u_int32_t	appname;
+	u_int32_t	offset_lo;
+	u_int32_t	offset_hi;
+	DBT	old_data;
+	DBT	new_data;
+	u_int32_t	flag;
+} __fop_write_file_60_args;
+
+extern __DB_IMPORT DB_LOG_RECSPEC __fop_write_file_60_desc[];
+static inline int __fop_write_file_60_read(ENV *env, 
+    void *data, __fop_write_file_60_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    NULL, NULL, data, __fop_write_file_60_desc, sizeof(__fop_write_file_60_args), (void**)arg));
+}
+#define	DB___fop_write_file	86
+typedef struct ___fop_write_file_args {
+	u_int32_t type;
+	DB_TXN *txnp;
+	DB_LSN prev_lsn;
+	DBT	name;
+	DBT	dirname;
+	u_int32_t	appname;
+	u_int64_t	offset;
+	DBT	old_data;
+	DBT	new_data;
+	u_int32_t	flag;
+} __fop_write_file_args;
+
+extern __DB_IMPORT DB_LOG_RECSPEC __fop_write_file_desc[];
+static inline int
+__fop_write_file_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags,
+    const DBT *name, const DBT *dirname, u_int32_t appname, u_int64_t offset, const DBT *old_data,
+    const DBT *new_data, u_int32_t flag)
+{
+	return (__log_put_record(env, NULL, txnp, ret_lsnp,
+	    flags, DB___fop_write_file, 0,
+	    sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) +
+	    LOG_DBT_SIZE(name) + LOG_DBT_SIZE(dirname) + sizeof(u_int32_t) +
+	    sizeof(u_int64_t) + LOG_DBT_SIZE(old_data) + LOG_DBT_SIZE(new_data) +
+	    sizeof(u_int32_t),
+	    __fop_write_file_desc,
+	    name, dirname, appname, offset, old_data, new_data, flag));
+}
+
+static inline int __fop_write_file_read(ENV *env, 
+    void *data, __fop_write_file_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    NULL, NULL, data, __fop_write_file_desc, sizeof(__fop_write_file_args), (void**)arg));
+}
 #define	DB___fop_rename_42	146
 #define	DB___fop_rename_noundo_46	150
 typedef struct ___fop_rename_42_args {
@@ -171,6 +291,35 @@ static inline int __fop_rename_noundo_46_read(ENV *env,
 	return (__log_read_record(env, 
 	    NULL, NULL, data, __fop_rename_noundo_46_desc, sizeof(__fop_rename_42_args), (void**)arg));
 }
+#define	DB___fop_rename_60	146
+#define	DB___fop_rename_noundo_60	150
+typedef struct ___fop_rename_60_args {
+	u_int32_t type;
+	DB_TXN *txnp;
+	DB_LSN prev_lsn;
+	DBT	oldname;
+	DBT	newname;
+	DBT	dirname;
+	DBT	fileid;
+	u_int32_t	appname;
+} __fop_rename_60_args;
+
+extern __DB_IMPORT DB_LOG_RECSPEC __fop_rename_60_desc[];
+static inline int __fop_rename_60_read(ENV *env, 
+    void *data, __fop_rename_60_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    NULL, NULL, data, __fop_rename_60_desc, sizeof(__fop_rename_60_args), (void**)arg));
+}
+extern __DB_IMPORT DB_LOG_RECSPEC __fop_rename_noundo_60_desc[];
+static inline int __fop_rename_noundo_60_read(ENV *env, 
+    void *data, __fop_rename_60_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    NULL, NULL, data, __fop_rename_noundo_60_desc, sizeof(__fop_rename_60_args), (void**)arg));
+}
 #define	DB___fop_rename	146
 #define	DB___fop_rename_noundo	150
 typedef struct ___fop_rename_args {
@@ -226,6 +375,26 @@ static inline int __fop_rename_noundo_read(ENV *env,
 	return (__log_read_record(env, 
 	    NULL, NULL, data, __fop_rename_noundo_desc, sizeof(__fop_rename_args), (void**)arg));
 }
+#define	DB___fop_file_remove_60	141
+typedef struct ___fop_file_remove_60_args {
+	u_int32_t type;
+	DB_TXN *txnp;
+	DB_LSN prev_lsn;
+	DBT	real_fid;
+	DBT	tmp_fid;
+	DBT	name;
+	u_int32_t	appname;
+	u_int32_t	child;
+} __fop_file_remove_60_args;
+
+extern __DB_IMPORT DB_LOG_RECSPEC __fop_file_remove_60_desc[];
+static inline int __fop_file_remove_60_read(ENV *env, 
+    void *data, __fop_file_remove_60_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    NULL, NULL, data, __fop_file_remove_60_desc, sizeof(__fop_file_remove_60_args), (void**)arg));
+}
 #define	DB___fop_file_remove	141
 typedef struct ___fop_file_remove_args {
 	u_int32_t type;
diff --git a/src/dbinc_auto/fileops_ext.h b/src/dbinc_auto/fileops_ext.h
index 0aa6c1e1..89306183 100644
--- a/src/dbinc_auto/fileops_ext.h
+++ b/src/dbinc_auto/fileops_ext.h
@@ -8,35 +8,51 @@ extern "C" {
 
 int __fop_init_recover __P((ENV *, DB_DISTAB *));
 int __fop_create_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_create_60_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_create_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_remove_60_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_remove_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_write_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_write_60_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_write_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_write_file_60_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_write_file_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_rename_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_rename_60_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_rename_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_file_remove_60_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_file_remove_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_init_print __P((ENV *, DB_DISTAB *));
 int __fop_create __P((ENV *, DB_TXN *, DB_FH **, const char *, const char **, APPNAME, int, u_int32_t));
 int __fop_remove __P((ENV *, DB_TXN *, u_int8_t *, const char *, const char **, APPNAME, u_int32_t));
 int __fop_write __P((ENV *, DB_TXN *, const char *, const char *, APPNAME, DB_FH *, u_int32_t, db_pgno_t, u_int32_t, void *, u_int32_t, u_int32_t, u_int32_t));
+int __fop_write_file __P((ENV *, DB_TXN *, const char *, const char *, APPNAME, DB_FH *, off_t, void *, size_t, u_int32_t));
 int __fop_rename __P((ENV *, DB_TXN *, const char *, const char *, const char **, u_int8_t *, APPNAME, int, u_int32_t));
 int __fop_create_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_create_60_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_create_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_remove_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_remove_60_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_write_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_write_60_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_write_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_write_file_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_write_file_60_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_rename_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_rename_noundo_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_rename_60_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_rename_noundo_60_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_rename_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_rename_noundo_46_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_file_remove_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_file_remove_60_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_lock_handle __P((ENV *, DB *, DB_LOCKER *, db_lockmode_t, DB_LOCK *, u_int32_t));
 int __fop_file_setup __P((DB *, DB_THREAD_INFO *ip, DB_TXN *, const char *, int, u_int32_t, u_int32_t *));
 int __fop_subdb_setup __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, int, u_int32_t));
 int __fop_remove_setup __P((DB *, DB_TXN *, const char *, u_int32_t));
 int __fop_read_meta __P((ENV *, const char *, u_int8_t *, size_t, DB_FH *, int, size_t *));
-int __fop_dummy __P((DB *, DB_TXN *, const char *, const char *));
-int __fop_dbrename __P((DB *, const char *, const char *));
+int __fop_dummy __P((DB *, DB_TXN *, const char *, const char *, APPNAME));
+int __fop_dbrename __P((DB *, const char *, const char *, APPNAME));
 
 #if defined(__cplusplus)
 }
diff --git a/src/dbinc_auto/hash_ext.h b/src/dbinc_auto/hash_ext.h
index e83fe817..4d7c2e9c 100644
--- a/src/dbinc_auto/hash_ext.h
+++ b/src/dbinc_auto/hash_ext.h
@@ -57,7 +57,7 @@ int __ham_return_meta __P((DBC *, u_int32_t, DBMETA **));
 int __ham_db_create __P((DB *));
 int __ham_db_close __P((DB *));
 int __ham_get_h_ffactor __P((DB *, u_int32_t *));
-int __ham_set_h_compare __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+int __ham_set_h_compare __P((DB *, int (*)(DB *, const DBT *, const DBT *, size_t *)));
 int __ham_get_h_nelem __P((DB *, u_int32_t *));
 void __ham_copy_config __P((DB *, DB*, u_int32_t));
 int __ham_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char * name, db_pgno_t, u_int32_t));
@@ -116,6 +116,8 @@ int __ham_31_hashmeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
 int __ham_31_hash __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
 int __ham_46_hashmeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
 int __ham_46_hash __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+int __ham_60_hashmeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+int __ham_60_hash __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
 int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *, db_pgno_t, u_int32_t));
 int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
 int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
diff --git a/src/dbinc_auto/heap_auto.h b/src/dbinc_auto/heap_auto.h
index bf288627..f91cacfe 100644
--- a/src/dbinc_auto/heap_auto.h
+++ b/src/dbinc_auto/heap_auto.h
@@ -26,7 +26,7 @@ __heap_addrem_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags,
     const DBT *hdr, const DBT *dbt, DB_LSN * pagelsn)
 {
 	return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp,
-	    flags, DB___heap_addrem, 0,
+	    flags, DB___heap_addrem, 1,
 	    sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) +
 	    sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) +
 	    sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(hdr) +
@@ -42,6 +42,52 @@ static inline int __heap_addrem_read(ENV *env,
 	return (__log_read_record(env, 
 	    dbpp, td, data, __heap_addrem_desc, sizeof(__heap_addrem_args), (void**)arg));
 }
+#define	DB___heap_addrem_60	151
+typedef struct ___heap_addrem_60_args {
+	u_int32_t type;
+	DB_TXN *txnp;
+	DB_LSN prev_lsn;
+	u_int32_t	opcode;
+	int32_t	fileid;
+	db_pgno_t	pgno;
+	u_int32_t	indx;
+	u_int32_t	nbytes;
+	DBT	hdr;
+	DBT	dbt;
+	DB_LSN	pagelsn;
+} __heap_addrem_60_args;
+
+extern __DB_IMPORT DB_LOG_RECSPEC __heap_addrem_60_desc[];
+static inline int __heap_addrem_60_read(ENV *env, 
+    DB **dbpp, void *td, void *data, __heap_addrem_60_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    dbpp, td, data, __heap_addrem_60_desc, sizeof(__heap_addrem_60_args), (void**)arg));
+}
+#define	DB___heap_addrem_50	151
+typedef struct ___heap_addrem_50_args {
+	u_int32_t type;
+	DB_TXN *txnp;
+	DB_LSN prev_lsn;
+	u_int32_t	opcode;
+	int32_t	fileid;
+	db_pgno_t	pgno;
+	u_int32_t	indx;
+	u_int32_t	nbytes;
+	DBT	hdr;
+	DBT	dbt;
+	DB_LSN	pagelsn;
+} __heap_addrem_50_args;
+
+extern __DB_IMPORT DB_LOG_RECSPEC __heap_addrem_50_desc[];
+static inline int __heap_addrem_50_read(ENV *env, 
+    DB **dbpp, void *td, void *data, __heap_addrem_50_args **arg)
+{
+	*arg = NULL;
+	return (__log_read_record(env, 
+	    dbpp, td, data, __heap_addrem_50_desc, sizeof(__heap_addrem_50_args), (void**)arg));
+}
 #define	DB___heap_pg_alloc	152
 typedef struct ___heap_pg_alloc_args {
 	u_int32_t type;
diff --git a/src/dbinc_auto/heap_ext.h b/src/dbinc_auto/heap_ext.h
index 8bc24b61..e886d6c9 100644
--- a/src/dbinc_auto/heap_ext.h
+++ b/src/dbinc_auto/heap_ext.h
@@ -15,6 +15,8 @@ int __heapc_gsplit __P((DBC *, DBT *, void **, u_int32_t *));
 int __heapc_refresh __P((DBC *));
 int __heap_init_recover __P((ENV *, DB_DISTAB *));
 int __heap_addrem_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __heap_addrem_60_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __heap_addrem_50_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_pg_alloc_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_trunc_meta_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_trunc_page_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
@@ -39,6 +41,8 @@ int __heap_addrem_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_pg_alloc_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_trunc_meta_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_trunc_page_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __heap_addrem_60_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __heap_addrem_50_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_truncate __P((DBC *, u_int32_t *));
 int __heap_stat __P((DBC *, void *, u_int32_t));
 int __heap_stat_print __P((DBC *, u_int32_t));
@@ -46,6 +50,8 @@ void __heap_print_cursor __P((DBC *));
 int __heap_stat_callback __P((DBC *, PAGE *, void *, int *));
 int __heap_traverse __P((DBC *, int (*)(DBC *, PAGE *, void *, int *), void *));
 int __db_no_heap_am __P((ENV *));
+int __heap_60_heapmeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+int __heap_60_heap __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
 int __heap_vrfy_meta __P((DB *, VRFY_DBINFO *, HEAPMETA *, db_pgno_t, u_int32_t));
 int __heap_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
 int __heap_vrfy_structure __P((DB *, VRFY_DBINFO *, u_int32_t));
diff --git a/src/dbinc_auto/int_def.in b/src/dbinc_auto/int_def.in
index dce2831c..5042dfd0 100644
--- a/src/dbinc_auto/int_def.in
+++ b/src/dbinc_auto/int_def.in
@@ -85,13 +85,18 @@
 #define	__db_init_print __db_init_print@DB_VERSION_UNIQUE_NAME@
 #define	__db_dbbackup_pp __db_dbbackup_pp@DB_VERSION_UNIQUE_NAME@
 #define	__db_dbbackup __db_dbbackup@DB_VERSION_UNIQUE_NAME@
-#define	__db_backup __db_backup@DB_VERSION_UNIQUE_NAME@
+#define	backup_data_copy backup_data_copy@DB_VERSION_UNIQUE_NAME@
+#define	__db_backup_pp __db_backup_pp@DB_VERSION_UNIQUE_NAME@
 #define	__dbc_close __dbc_close@DB_VERSION_UNIQUE_NAME@
 #define	__dbc_destroy __dbc_destroy@DB_VERSION_UNIQUE_NAME@
 #define	__dbc_cmp __dbc_cmp@DB_VERSION_UNIQUE_NAME@
 #define	__dbc_count __dbc_count@DB_VERSION_UNIQUE_NAME@
 #define	__dbc_del __dbc_del@DB_VERSION_UNIQUE_NAME@
 #define	__dbc_idel __dbc_idel@DB_VERSION_UNIQUE_NAME@
+#define	__dbc_db_stream __dbc_db_stream@DB_VERSION_UNIQUE_NAME@
+#define	__dbc_get_blob_id __dbc_get_blob_id@DB_VERSION_UNIQUE_NAME@
+#define	__dbc_get_blob_size __dbc_get_blob_size@DB_VERSION_UNIQUE_NAME@
+#define	__dbc_set_blob_size __dbc_set_blob_size@DB_VERSION_UNIQUE_NAME@
 #ifdef HAVE_COMPRESSION
 #define	__dbc_bulk_del __dbc_bulk_del@DB_VERSION_UNIQUE_NAME@
 #endif
@@ -115,6 +120,7 @@
 #ifdef DIAGNOSTIC
 #define	__db_check_skeyset __db_check_skeyset@DB_VERSION_UNIQUE_NAME@
 #endif
+#define	__dbc_diags __dbc_diags@DB_VERSION_UNIQUE_NAME@
 #define	__cdsgroup_begin __cdsgroup_begin@DB_VERSION_UNIQUE_NAME@
 #define	__cdsgroup_begin_pp __cdsgroup_begin_pp@DB_VERSION_UNIQUE_NAME@
 #define	__db_compact_int __db_compact_int@DB_VERSION_UNIQUE_NAME@
@@ -207,6 +213,10 @@
 #define	__db_lput __db_lput@DB_VERSION_UNIQUE_NAME@
 #define	__db_create_internal __db_create_internal@DB_VERSION_UNIQUE_NAME@
 #define	__dbh_am_chk __dbh_am_chk@DB_VERSION_UNIQUE_NAME@
+#define	__db_get_blob_threshold __db_get_blob_threshold@DB_VERSION_UNIQUE_NAME@
+#define	__db_set_blob_threshold __db_set_blob_threshold@DB_VERSION_UNIQUE_NAME@
+#define	__db_blobs_enabled __db_blobs_enabled@DB_VERSION_UNIQUE_NAME@
+#define	__db_set_dup_compare __db_set_dup_compare@DB_VERSION_UNIQUE_NAME@
 #define	__db_get_flags __db_get_flags@DB_VERSION_UNIQUE_NAME@
 #define	__db_set_flags __db_set_flags@DB_VERSION_UNIQUE_NAME@
 #define	__db_get_lorder __db_get_lorder@DB_VERSION_UNIQUE_NAME@
@@ -219,6 +229,7 @@
 #define	__db_chk_meta __db_chk_meta@DB_VERSION_UNIQUE_NAME@
 #define	__db_meta_setup __db_meta_setup@DB_VERSION_UNIQUE_NAME@
 #define	__db_reopen __db_reopen@DB_VERSION_UNIQUE_NAME@
+#define	__db_alloc_dbt __db_alloc_dbt@DB_VERSION_UNIQUE_NAME@
 #define	__db_goff __db_goff@DB_VERSION_UNIQUE_NAME@
 #define	__db_poff __db_poff@DB_VERSION_UNIQUE_NAME@
 #define	__db_ovref __db_ovref@DB_VERSION_UNIQUE_NAME@
@@ -248,6 +259,7 @@
 #define	__db_prfooter __db_prfooter@DB_VERSION_UNIQUE_NAME@
 #define	__db_pr_callback __db_pr_callback@DB_VERSION_UNIQUE_NAME@
 #define	__db_dbtype_to_string __db_dbtype_to_string@DB_VERSION_UNIQUE_NAME@
+#define	__db_tohex __db_tohex@DB_VERSION_UNIQUE_NAME@
 #define	__db_addrem_recover __db_addrem_recover@DB_VERSION_UNIQUE_NAME@
 #define	__db_addrem_42_recover __db_addrem_42_recover@DB_VERSION_UNIQUE_NAME@
 #define	__db_big_recover __db_big_recover@DB_VERSION_UNIQUE_NAME@
@@ -285,6 +297,8 @@
 #define	__db_rename_int __db_rename_int@DB_VERSION_UNIQUE_NAME@
 #define	__db_ret __db_ret@DB_VERSION_UNIQUE_NAME@
 #define	__db_retcopy __db_retcopy@DB_VERSION_UNIQUE_NAME@
+#define	__db_dbt_clone __db_dbt_clone@DB_VERSION_UNIQUE_NAME@
+#define	__db_dbt_clone_free __db_dbt_clone_free@DB_VERSION_UNIQUE_NAME@
 #define	__env_fileid_reset_pp __env_fileid_reset_pp@DB_VERSION_UNIQUE_NAME@
 #define	__env_fileid_reset __env_fileid_reset@DB_VERSION_UNIQUE_NAME@
 #define	__env_lsn_reset_pp __env_lsn_reset_pp@DB_VERSION_UNIQUE_NAME@
@@ -351,6 +365,7 @@
 #define	__part_key_range __part_key_range@DB_VERSION_UNIQUE_NAME@
 #define	__part_remove __part_remove@DB_VERSION_UNIQUE_NAME@
 #define	__part_rename __part_rename@DB_VERSION_UNIQUE_NAME@
+#define	__partc_dup __partc_dup@DB_VERSION_UNIQUE_NAME@
 #define	__part_verify __part_verify@DB_VERSION_UNIQUE_NAME@
 #define	__part_testdocopy __part_testdocopy@DB_VERSION_UNIQUE_NAME@
 #define	__db_no_partition __db_no_partition@DB_VERSION_UNIQUE_NAME@
@@ -361,6 +376,34 @@
 #define	__partition_init __partition_init@DB_VERSION_UNIQUE_NAME@
 #define	__part_fileid_reset __part_fileid_reset@DB_VERSION_UNIQUE_NAME@
 #define	__partition_set_dirs __partition_set_dirs@DB_VERSION_UNIQUE_NAME@
+#define	__blob_file_create __blob_file_create@DB_VERSION_UNIQUE_NAME@
+#define	__blob_file_close __blob_file_close@DB_VERSION_UNIQUE_NAME@
+#define	__blob_file_delete __blob_file_delete@DB_VERSION_UNIQUE_NAME@
+#define	__blob_file_open __blob_file_open@DB_VERSION_UNIQUE_NAME@
+#define	__blob_file_read __blob_file_read@DB_VERSION_UNIQUE_NAME@
+#define	__blob_file_write __blob_file_write@DB_VERSION_UNIQUE_NAME@
+#define	__blob_bulk __blob_bulk@DB_VERSION_UNIQUE_NAME@
+#define	__blob_get __blob_get@DB_VERSION_UNIQUE_NAME@
+#define	__blob_put __blob_put@DB_VERSION_UNIQUE_NAME@
+#define	__blob_repl __blob_repl@DB_VERSION_UNIQUE_NAME@
+#define	__blob_del __blob_del@DB_VERSION_UNIQUE_NAME@
+#define	__db_stream_init __db_stream_init@DB_VERSION_UNIQUE_NAME@
+#define	__db_stream_close_int __db_stream_close_int@DB_VERSION_UNIQUE_NAME@
+#define	__blob_make_sub_dir __blob_make_sub_dir@DB_VERSION_UNIQUE_NAME@
+#define	__blob_make_meta_fname __blob_make_meta_fname@DB_VERSION_UNIQUE_NAME@
+#define	__blob_get_dir __blob_get_dir@DB_VERSION_UNIQUE_NAME@
+#define	__blob_generate_dir_ids __blob_generate_dir_ids@DB_VERSION_UNIQUE_NAME@
+#define	__blob_generate_id __blob_generate_id@DB_VERSION_UNIQUE_NAME@
+#define	__blob_highest_id __blob_highest_id@DB_VERSION_UNIQUE_NAME@
+#define	__blob_calculate_dirs __blob_calculate_dirs@DB_VERSION_UNIQUE_NAME@
+#define	__blob_id_to_path __blob_id_to_path@DB_VERSION_UNIQUE_NAME@
+#define	__blob_str_to_id __blob_str_to_id@DB_VERSION_UNIQUE_NAME@
+#define	__blob_path_to_dir_ids __blob_path_to_dir_ids@DB_VERSION_UNIQUE_NAME@
+#define	__blob_salvage __blob_salvage@DB_VERSION_UNIQUE_NAME@
+#define	__blob_vrfy __blob_vrfy@DB_VERSION_UNIQUE_NAME@
+#define	__blob_del_hierarchy __blob_del_hierarchy@DB_VERSION_UNIQUE_NAME@
+#define	__blob_del_all __blob_del_all@DB_VERSION_UNIQUE_NAME@
+#define	__blob_copy_all __blob_copy_all@DB_VERSION_UNIQUE_NAME@
 #define	__bam_compact_int __bam_compact_int@DB_VERSION_UNIQUE_NAME@
 #define	__bam_compact_opd __bam_compact_opd@DB_VERSION_UNIQUE_NAME@
 #define	__bam_truncate_ipages __bam_truncate_ipages@DB_VERSION_UNIQUE_NAME@
@@ -470,6 +513,8 @@
 #define	__bam_30_btreemeta __bam_30_btreemeta@DB_VERSION_UNIQUE_NAME@
 #define	__bam_31_btreemeta __bam_31_btreemeta@DB_VERSION_UNIQUE_NAME@
 #define	__bam_31_lbtree __bam_31_lbtree@DB_VERSION_UNIQUE_NAME@
+#define	__bam_60_btreemeta __bam_60_btreemeta@DB_VERSION_UNIQUE_NAME@
+#define	__bam_60_lbtree __bam_60_lbtree@DB_VERSION_UNIQUE_NAME@
 #define	__bam_vrfy_meta __bam_vrfy_meta@DB_VERSION_UNIQUE_NAME@
 #define	__ram_vrfy_leaf __ram_vrfy_leaf@DB_VERSION_UNIQUE_NAME@
 #define	__bam_vrfy __bam_vrfy@DB_VERSION_UNIQUE_NAME@
@@ -628,6 +673,7 @@
 #ifdef DIAGNOSTIC
 #define	__db_assert __db_assert@DB_VERSION_UNIQUE_NAME@
 #endif
+#define	__env_panic_event __env_panic_event@DB_VERSION_UNIQUE_NAME@
 #define	__env_panic_msg __env_panic_msg@DB_VERSION_UNIQUE_NAME@
 #define	__env_panic __env_panic@DB_VERSION_UNIQUE_NAME@
 #define	__db_unknown_error __db_unknown_error@DB_VERSION_UNIQUE_NAME@
@@ -639,6 +685,7 @@
 #define	__db_msgadd __db_msgadd@DB_VERSION_UNIQUE_NAME@
 #define	__db_msgadd_ap __db_msgadd_ap@DB_VERSION_UNIQUE_NAME@
 #define	__db_msg __db_msg@DB_VERSION_UNIQUE_NAME@
+#define	__db_debug_msg __db_debug_msg@DB_VERSION_UNIQUE_NAME@
 #define	__db_repmsg __db_repmsg@DB_VERSION_UNIQUE_NAME@
 #define	__db_unknown_flag __db_unknown_flag@DB_VERSION_UNIQUE_NAME@
 #define	__db_unknown_type __db_unknown_type@DB_VERSION_UNIQUE_NAME@
@@ -653,6 +700,24 @@
 #define	__db_rdonly __db_rdonly@DB_VERSION_UNIQUE_NAME@
 #define	__db_space_err __db_space_err@DB_VERSION_UNIQUE_NAME@
 #define	__db_failed __db_failed@DB_VERSION_UNIQUE_NAME@
+#define	__env_failure_remember __env_failure_remember@DB_VERSION_UNIQUE_NAME@
+#ifdef HAVE_ERROR_HISTORY
+#define	__db_thread_init __db_thread_init@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifdef HAVE_ERROR_HISTORY
+#define	__db_diags __db_diags@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifdef HAVE_ERROR_HISTORY
+#define	__db_deferred_get __db_deferred_get@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifdef HAVE_ERROR_HISTORY
+#define	__db_deferred_discard __db_deferred_discard@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifdef HAVE_ERROR_HISTORY
+#define	__db_remember_context __db_remember_context@DB_VERSION_UNIQUE_NAME@
+#endif
+#define	__db_ctimespec __db_ctimespec@DB_VERSION_UNIQUE_NAME@
+#define	__db_fmt_quote __db_fmt_quote@DB_VERSION_UNIQUE_NAME@
 #define	__db_getlong __db_getlong@DB_VERSION_UNIQUE_NAME@
 #define	__db_getulong __db_getulong@DB_VERSION_UNIQUE_NAME@
 #define	__db_idspace __db_idspace@DB_VERSION_UNIQUE_NAME@
@@ -709,11 +774,14 @@
 #define	__dbreg_failchk __dbreg_failchk@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_log_close __dbreg_log_close@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_log_id __dbreg_log_id@DB_VERSION_UNIQUE_NAME@
+#define	__dbreg_register_42_desc __dbreg_register_42_desc@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_register_desc __dbreg_register_desc@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_init_recover __dbreg_init_recover@DB_VERSION_UNIQUE_NAME@
+#define	__dbreg_register_42_print __dbreg_register_42_print@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_register_print __dbreg_register_print@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_init_print __dbreg_init_print@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_register_recover __dbreg_register_recover@DB_VERSION_UNIQUE_NAME@
+#define	__dbreg_register_42_recover __dbreg_register_42_recover@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_stat_print __dbreg_stat_print@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_print_fname __dbreg_print_fname@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_add_dbentry __dbreg_add_dbentry@DB_VERSION_UNIQUE_NAME@
@@ -727,6 +795,7 @@
 #define	__dbreg_id_to_db __dbreg_id_to_db@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_id_to_fname __dbreg_id_to_fname@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_fid_to_fname __dbreg_fid_to_fname@DB_VERSION_UNIQUE_NAME@
+#define	__dbreg_blob_file_to_fname __dbreg_blob_file_to_fname@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_get_name __dbreg_get_name@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_do_open __dbreg_do_open@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_lazy_id __dbreg_lazy_id@DB_VERSION_UNIQUE_NAME@
@@ -760,9 +829,13 @@
 #define	__env_get_alloc __env_get_alloc@DB_VERSION_UNIQUE_NAME@
 #define	__env_set_alloc __env_set_alloc@DB_VERSION_UNIQUE_NAME@
 #define	__env_get_memory_init __env_get_memory_init@DB_VERSION_UNIQUE_NAME@
+#define	__env_get_blob_threshold_pp __env_get_blob_threshold_pp@DB_VERSION_UNIQUE_NAME@
+#define	__env_get_blob_threshold_int __env_get_blob_threshold_int@DB_VERSION_UNIQUE_NAME@
+#define	__env_set_blob_threshold __env_set_blob_threshold@DB_VERSION_UNIQUE_NAME@
 #define	__env_set_memory_init __env_set_memory_init@DB_VERSION_UNIQUE_NAME@
 #define	__env_get_memory_max __env_get_memory_max@DB_VERSION_UNIQUE_NAME@
 #define	__env_set_memory_max __env_set_memory_max@DB_VERSION_UNIQUE_NAME@
+#define	__env_set_blob_dir __env_set_blob_dir@DB_VERSION_UNIQUE_NAME@
 #define	__env_get_encrypt_flags __env_get_encrypt_flags@DB_VERSION_UNIQUE_NAME@
 #define	__env_set_encrypt __env_set_encrypt@DB_VERSION_UNIQUE_NAME@
 #define	__env_map_flags __env_map_flags@DB_VERSION_UNIQUE_NAME@
@@ -815,6 +888,7 @@
 #define	__env_ref_increment __env_ref_increment@DB_VERSION_UNIQUE_NAME@
 #define	__env_ref_decrement __env_ref_decrement@DB_VERSION_UNIQUE_NAME@
 #define	__env_ref_get __env_ref_get@DB_VERSION_UNIQUE_NAME@
+#define	__env_region_cleanup __env_region_cleanup@DB_VERSION_UNIQUE_NAME@
 #define	__env_detach __env_detach@DB_VERSION_UNIQUE_NAME@
 #define	__env_remove_env __env_remove_env@DB_VERSION_UNIQUE_NAME@
 #define	__env_region_attach __env_region_attach@DB_VERSION_UNIQUE_NAME@
@@ -826,6 +900,7 @@
 #define	__envreg_isalive __envreg_isalive@DB_VERSION_UNIQUE_NAME@
 #define	__env_struct_sig __env_struct_sig@DB_VERSION_UNIQUE_NAME@
 #define	__env_stat_print_pp __env_stat_print_pp@DB_VERSION_UNIQUE_NAME@
+#define	__env_print_thread __env_print_thread@DB_VERSION_UNIQUE_NAME@
 #define	__db_print_fh __db_print_fh@DB_VERSION_UNIQUE_NAME@
 #define	__db_print_fileid __db_print_fileid@DB_VERSION_UNIQUE_NAME@
 #define	__db_dl __db_dl@DB_VERSION_UNIQUE_NAME@
@@ -843,6 +918,18 @@
 #define	__repmgr_set_ack_policy __repmgr_set_ack_policy@DB_VERSION_UNIQUE_NAME@
 #endif
 #ifndef HAVE_REPLICATION_THREADS
+#define	__repmgr_get_incoming_queue_max __repmgr_get_incoming_queue_max@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+#define	__repmgr_set_incoming_queue_max __repmgr_set_incoming_queue_max@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+#define	__repmgr_get_incoming_queue_redzone __repmgr_get_incoming_queue_redzone@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+#define	__repmgr_get_incoming_queue_fullevent __repmgr_get_incoming_queue_fullevent@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifndef HAVE_REPLICATION_THREADS
 #define	__repmgr_site __repmgr_site@DB_VERSION_UNIQUE_NAME@
 #endif
 #ifndef HAVE_REPLICATION_THREADS
@@ -852,10 +939,10 @@
 #define	__repmgr_local_site __repmgr_local_site@DB_VERSION_UNIQUE_NAME@
 #endif
 #ifndef HAVE_REPLICATION_THREADS
-#define	__repmgr_site_list __repmgr_site_list@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_site_list_pp __repmgr_site_list_pp@DB_VERSION_UNIQUE_NAME@
 #endif
 #ifndef HAVE_REPLICATION_THREADS
-#define	__repmgr_start __repmgr_start@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_start_pp __repmgr_start_pp@DB_VERSION_UNIQUE_NAME@
 #endif
 #ifndef HAVE_REPLICATION_THREADS
 #define	__repmgr_stat_pp __repmgr_stat_pp@DB_VERSION_UNIQUE_NAME@
@@ -876,39 +963,63 @@
 #define	__repmgr_init_recover __repmgr_init_recover@DB_VERSION_UNIQUE_NAME@
 #endif
 #define	__fop_create_42_desc __fop_create_42_desc@DB_VERSION_UNIQUE_NAME@
+#define	__fop_create_60_desc __fop_create_60_desc@DB_VERSION_UNIQUE_NAME@
 #define	__fop_create_desc __fop_create_desc@DB_VERSION_UNIQUE_NAME@
+#define	__fop_remove_60_desc __fop_remove_60_desc@DB_VERSION_UNIQUE_NAME@
 #define	__fop_remove_desc __fop_remove_desc@DB_VERSION_UNIQUE_NAME@
 #define	__fop_write_42_desc __fop_write_42_desc@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_60_desc __fop_write_60_desc@DB_VERSION_UNIQUE_NAME@
 #define	__fop_write_desc __fop_write_desc@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_file_60_desc __fop_write_file_60_desc@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_file_desc __fop_write_file_desc@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_42_desc __fop_rename_42_desc@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_noundo_46_desc __fop_rename_noundo_46_desc@DB_VERSION_UNIQUE_NAME@
+#define	__fop_rename_60_desc __fop_rename_60_desc@DB_VERSION_UNIQUE_NAME@
+#define	__fop_rename_noundo_60_desc __fop_rename_noundo_60_desc@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_desc __fop_rename_desc@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_noundo_desc __fop_rename_noundo_desc@DB_VERSION_UNIQUE_NAME@
+#define	__fop_file_remove_60_desc __fop_file_remove_60_desc@DB_VERSION_UNIQUE_NAME@
 #define	__fop_file_remove_desc __fop_file_remove_desc@DB_VERSION_UNIQUE_NAME@
 #define	__fop_init_recover __fop_init_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_create_42_print __fop_create_42_print@DB_VERSION_UNIQUE_NAME@
+#define	__fop_create_60_print __fop_create_60_print@DB_VERSION_UNIQUE_NAME@
 #define	__fop_create_print __fop_create_print@DB_VERSION_UNIQUE_NAME@
+#define	__fop_remove_60_print __fop_remove_60_print@DB_VERSION_UNIQUE_NAME@
 #define	__fop_remove_print __fop_remove_print@DB_VERSION_UNIQUE_NAME@
 #define	__fop_write_42_print __fop_write_42_print@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_60_print __fop_write_60_print@DB_VERSION_UNIQUE_NAME@
 #define	__fop_write_print __fop_write_print@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_file_60_print __fop_write_file_60_print@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_file_print __fop_write_file_print@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_42_print __fop_rename_42_print@DB_VERSION_UNIQUE_NAME@
+#define	__fop_rename_60_print __fop_rename_60_print@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_print __fop_rename_print@DB_VERSION_UNIQUE_NAME@
+#define	__fop_file_remove_60_print __fop_file_remove_60_print@DB_VERSION_UNIQUE_NAME@
 #define	__fop_file_remove_print __fop_file_remove_print@DB_VERSION_UNIQUE_NAME@
 #define	__fop_init_print __fop_init_print@DB_VERSION_UNIQUE_NAME@
 #define	__fop_create __fop_create@DB_VERSION_UNIQUE_NAME@
 #define	__fop_remove __fop_remove@DB_VERSION_UNIQUE_NAME@
 #define	__fop_write __fop_write@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_file __fop_write_file@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename __fop_rename@DB_VERSION_UNIQUE_NAME@
 #define	__fop_create_recover __fop_create_recover@DB_VERSION_UNIQUE_NAME@
+#define	__fop_create_60_recover __fop_create_60_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_create_42_recover __fop_create_42_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_remove_recover __fop_remove_recover@DB_VERSION_UNIQUE_NAME@
+#define	__fop_remove_60_recover __fop_remove_60_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_write_recover __fop_write_recover@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_60_recover __fop_write_60_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_write_42_recover __fop_write_42_recover@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_file_recover __fop_write_file_recover@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_file_60_recover __fop_write_file_60_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_recover __fop_rename_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_noundo_recover __fop_rename_noundo_recover@DB_VERSION_UNIQUE_NAME@
+#define	__fop_rename_60_recover __fop_rename_60_recover@DB_VERSION_UNIQUE_NAME@
+#define	__fop_rename_noundo_60_recover __fop_rename_noundo_60_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_42_recover __fop_rename_42_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_noundo_46_recover __fop_rename_noundo_46_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_file_remove_recover __fop_file_remove_recover@DB_VERSION_UNIQUE_NAME@
+#define	__fop_file_remove_60_recover __fop_file_remove_60_recover@DB_VERSION_UNIQUE_NAME@
 #define	__fop_lock_handle __fop_lock_handle@DB_VERSION_UNIQUE_NAME@
 #define	__fop_file_setup __fop_file_setup@DB_VERSION_UNIQUE_NAME@
 #define	__fop_subdb_setup __fop_subdb_setup@DB_VERSION_UNIQUE_NAME@
@@ -1041,6 +1152,8 @@
 #define	__ham_31_hash __ham_31_hash@DB_VERSION_UNIQUE_NAME@
 #define	__ham_46_hashmeta __ham_46_hashmeta@DB_VERSION_UNIQUE_NAME@
 #define	__ham_46_hash __ham_46_hash@DB_VERSION_UNIQUE_NAME@
+#define	__ham_60_hashmeta __ham_60_hashmeta@DB_VERSION_UNIQUE_NAME@
+#define	__ham_60_hash __ham_60_hash@DB_VERSION_UNIQUE_NAME@
 #define	__ham_vrfy_meta __ham_vrfy_meta@DB_VERSION_UNIQUE_NAME@
 #define	__ham_vrfy __ham_vrfy@DB_VERSION_UNIQUE_NAME@
 #define	__ham_vrfy_structure __ham_vrfy_structure@DB_VERSION_UNIQUE_NAME@
@@ -1055,11 +1168,15 @@
 #define	__heapc_gsplit __heapc_gsplit@DB_VERSION_UNIQUE_NAME@
 #define	__heapc_refresh __heapc_refresh@DB_VERSION_UNIQUE_NAME@
 #define	__heap_addrem_desc __heap_addrem_desc@DB_VERSION_UNIQUE_NAME@
+#define	__heap_addrem_60_desc __heap_addrem_60_desc@DB_VERSION_UNIQUE_NAME@
+#define	__heap_addrem_50_desc __heap_addrem_50_desc@DB_VERSION_UNIQUE_NAME@
 #define	__heap_pg_alloc_desc __heap_pg_alloc_desc@DB_VERSION_UNIQUE_NAME@
 #define	__heap_trunc_meta_desc __heap_trunc_meta_desc@DB_VERSION_UNIQUE_NAME@
 #define	__heap_trunc_page_desc __heap_trunc_page_desc@DB_VERSION_UNIQUE_NAME@
 #define	__heap_init_recover __heap_init_recover@DB_VERSION_UNIQUE_NAME@
 #define	__heap_addrem_print __heap_addrem_print@DB_VERSION_UNIQUE_NAME@
+#define	__heap_addrem_60_print __heap_addrem_60_print@DB_VERSION_UNIQUE_NAME@
+#define	__heap_addrem_50_print __heap_addrem_50_print@DB_VERSION_UNIQUE_NAME@
 #define	__heap_pg_alloc_print __heap_pg_alloc_print@DB_VERSION_UNIQUE_NAME@
 #define	__heap_trunc_meta_print __heap_trunc_meta_print@DB_VERSION_UNIQUE_NAME@
 #define	__heap_trunc_page_print __heap_trunc_page_print@DB_VERSION_UNIQUE_NAME@
@@ -1084,6 +1201,8 @@
 #define	__heap_pg_alloc_recover __heap_pg_alloc_recover@DB_VERSION_UNIQUE_NAME@
 #define	__heap_trunc_meta_recover __heap_trunc_meta_recover@DB_VERSION_UNIQUE_NAME@
 #define	__heap_trunc_page_recover __heap_trunc_page_recover@DB_VERSION_UNIQUE_NAME@
+#define	__heap_addrem_60_recover __heap_addrem_60_recover@DB_VERSION_UNIQUE_NAME@
+#define	__heap_addrem_50_recover __heap_addrem_50_recover@DB_VERSION_UNIQUE_NAME@
 #define	__heap_truncate __heap_truncate@DB_VERSION_UNIQUE_NAME@
 #define	__heap_stat __heap_stat@DB_VERSION_UNIQUE_NAME@
 #define	__heap_stat_print __heap_stat_print@DB_VERSION_UNIQUE_NAME@
@@ -1091,6 +1210,8 @@
 #define	__heap_stat_callback __heap_stat_callback@DB_VERSION_UNIQUE_NAME@
 #define	__heap_traverse __heap_traverse@DB_VERSION_UNIQUE_NAME@
 #define	__db_no_heap_am __db_no_heap_am@DB_VERSION_UNIQUE_NAME@
+#define	__heap_60_heapmeta __heap_60_heapmeta@DB_VERSION_UNIQUE_NAME@
+#define	__heap_60_heap __heap_60_heap@DB_VERSION_UNIQUE_NAME@
 #define	__heap_vrfy_meta __heap_vrfy_meta@DB_VERSION_UNIQUE_NAME@
 #define	__heap_vrfy __heap_vrfy@DB_VERSION_UNIQUE_NAME@
 #define	__heap_vrfy_structure __heap_vrfy_structure@DB_VERSION_UNIQUE_NAME@
@@ -1129,6 +1250,7 @@
 #define	__lock_addfamilylocker __lock_addfamilylocker@DB_VERSION_UNIQUE_NAME@
 #define	__lock_freelocker __lock_freelocker@DB_VERSION_UNIQUE_NAME@
 #define	__lock_familyremove __lock_familyremove@DB_VERSION_UNIQUE_NAME@
+#define	__lock_local_locker_invalidate __lock_local_locker_invalidate@DB_VERSION_UNIQUE_NAME@
 #define	__lock_fix_list __lock_fix_list@DB_VERSION_UNIQUE_NAME@
 #define	__lock_get_list __lock_get_list@DB_VERSION_UNIQUE_NAME@
 #define	__lock_list_print __lock_list_print@DB_VERSION_UNIQUE_NAME@
@@ -1154,6 +1276,7 @@
 #define	__lock_set_env_timeout __lock_set_env_timeout@DB_VERSION_UNIQUE_NAME@
 #define	__lock_open __lock_open@DB_VERSION_UNIQUE_NAME@
 #define	__lock_env_refresh __lock_env_refresh@DB_VERSION_UNIQUE_NAME@
+#define	__lock_region_detach __lock_region_detach@DB_VERSION_UNIQUE_NAME@
 #define	__lock_region_mutex_count __lock_region_mutex_count@DB_VERSION_UNIQUE_NAME@
 #define	__lock_region_mutex_max __lock_region_mutex_max@DB_VERSION_UNIQUE_NAME@
 #define	__lock_region_max __lock_region_max@DB_VERSION_UNIQUE_NAME@
@@ -1162,6 +1285,7 @@
 #define	__lock_stat_print_pp __lock_stat_print_pp@DB_VERSION_UNIQUE_NAME@
 #define	__lock_stat_print __lock_stat_print@DB_VERSION_UNIQUE_NAME@
 #define	__lock_printlock __lock_printlock@DB_VERSION_UNIQUE_NAME@
+#define	__lock_dump_locker __lock_dump_locker@DB_VERSION_UNIQUE_NAME@
 #define	__lock_set_timeout __lock_set_timeout@DB_VERSION_UNIQUE_NAME@
 #define	__lock_set_timeout_internal __lock_set_timeout_internal@DB_VERSION_UNIQUE_NAME@
 #define	__lock_inherit_timeout __lock_inherit_timeout@DB_VERSION_UNIQUE_NAME@
@@ -1169,6 +1293,7 @@
 #define	__lock_lhash __lock_lhash@DB_VERSION_UNIQUE_NAME@
 #define	__lock_nomem __lock_nomem@DB_VERSION_UNIQUE_NAME@
 #define	__log_open __log_open@DB_VERSION_UNIQUE_NAME@
+#define	__log_region_detach __log_region_detach@DB_VERSION_UNIQUE_NAME@
 #define	__log_find __log_find@DB_VERSION_UNIQUE_NAME@
 #define	__log_valid __log_valid@DB_VERSION_UNIQUE_NAME@
 #define	__log_env_refresh __log_env_refresh@DB_VERSION_UNIQUE_NAME@
@@ -1234,6 +1359,7 @@
 #define	__log_file_pp __log_file_pp@DB_VERSION_UNIQUE_NAME@
 #define	__log_name __log_name@DB_VERSION_UNIQUE_NAME@
 #define	__log_rep_put __log_rep_put@DB_VERSION_UNIQUE_NAME@
+#define	__log_rep_write __log_rep_write@DB_VERSION_UNIQUE_NAME@
 #define	__log_put_record_pp __log_put_record_pp@DB_VERSION_UNIQUE_NAME@
 #define	__log_put_record __log_put_record@DB_VERSION_UNIQUE_NAME@
 #define	__log_stat_pp __log_stat_pp@DB_VERSION_UNIQUE_NAME@
@@ -1277,6 +1403,7 @@
 #define	__db_merge_verify __db_merge_verify@DB_VERSION_UNIQUE_NAME@
 #define	__db_pgno_verify __db_pgno_verify@DB_VERSION_UNIQUE_NAME@
 #define	__dbreg_register_verify __dbreg_register_verify@DB_VERSION_UNIQUE_NAME@
+#define	__dbreg_register_42_verify __dbreg_register_42_verify@DB_VERSION_UNIQUE_NAME@
 #define	__bam_split_verify __bam_split_verify@DB_VERSION_UNIQUE_NAME@
 #define	__bam_split_42_verify __bam_split_42_verify@DB_VERSION_UNIQUE_NAME@
 #define	__bam_rsplit_verify __bam_rsplit_verify@DB_VERSION_UNIQUE_NAME@
@@ -1291,12 +1418,19 @@
 #define	__bam_relink_43_verify __bam_relink_43_verify@DB_VERSION_UNIQUE_NAME@
 #define	__bam_merge_44_verify __bam_merge_44_verify@DB_VERSION_UNIQUE_NAME@
 #define	__fop_create_42_verify __fop_create_42_verify@DB_VERSION_UNIQUE_NAME@
+#define	__fop_create_60_verify __fop_create_60_verify@DB_VERSION_UNIQUE_NAME@
 #define	__fop_create_verify __fop_create_verify@DB_VERSION_UNIQUE_NAME@
+#define	__fop_remove_60_verify __fop_remove_60_verify@DB_VERSION_UNIQUE_NAME@
 #define	__fop_remove_verify __fop_remove_verify@DB_VERSION_UNIQUE_NAME@
 #define	__fop_write_42_verify __fop_write_42_verify@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_60_verify __fop_write_60_verify@DB_VERSION_UNIQUE_NAME@
 #define	__fop_write_verify __fop_write_verify@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_file_60_verify __fop_write_file_60_verify@DB_VERSION_UNIQUE_NAME@
+#define	__fop_write_file_verify __fop_write_file_verify@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_42_verify __fop_rename_42_verify@DB_VERSION_UNIQUE_NAME@
+#define	__fop_rename_60_verify __fop_rename_60_verify@DB_VERSION_UNIQUE_NAME@
 #define	__fop_rename_verify __fop_rename_verify@DB_VERSION_UNIQUE_NAME@
+#define	__fop_file_remove_60_verify __fop_file_remove_60_verify@DB_VERSION_UNIQUE_NAME@
 #define	__fop_file_remove_verify __fop_file_remove_verify@DB_VERSION_UNIQUE_NAME@
 #define	__ham_insdel_verify __ham_insdel_verify@DB_VERSION_UNIQUE_NAME@
 #define	__ham_newpage_verify __ham_newpage_verify@DB_VERSION_UNIQUE_NAME@
@@ -1312,6 +1446,7 @@
 #define	__ham_curadj_verify __ham_curadj_verify@DB_VERSION_UNIQUE_NAME@
 #define	__ham_chgpg_verify __ham_chgpg_verify@DB_VERSION_UNIQUE_NAME@
 #define	__heap_addrem_verify __heap_addrem_verify@DB_VERSION_UNIQUE_NAME@
+#define	__heap_addrem_60_verify __heap_addrem_60_verify@DB_VERSION_UNIQUE_NAME@
 #define	__heap_pg_alloc_verify __heap_pg_alloc_verify@DB_VERSION_UNIQUE_NAME@
 #define	__heap_trunc_meta_verify __heap_trunc_meta_verify@DB_VERSION_UNIQUE_NAME@
 #define	__heap_trunc_page_verify __heap_trunc_page_verify@DB_VERSION_UNIQUE_NAME@
@@ -1363,6 +1498,7 @@
 #define	__del_txn_pages __del_txn_pages@DB_VERSION_UNIQUE_NAME@
 #define	__is_ancestor_txn __is_ancestor_txn@DB_VERSION_UNIQUE_NAME@
 #define	__return_txn_pages __return_txn_pages@DB_VERSION_UNIQUE_NAME@
+#define	__memp_bh_unreachable __memp_bh_unreachable@DB_VERSION_UNIQUE_NAME@
 #define	__memp_alloc __memp_alloc@DB_VERSION_UNIQUE_NAME@
 #define	__memp_free __memp_free@DB_VERSION_UNIQUE_NAME@
 #define	__memp_backup_open __memp_backup_open@DB_VERSION_UNIQUE_NAME@
@@ -1375,6 +1511,7 @@
 #define	__memp_bhfree __memp_bhfree@DB_VERSION_UNIQUE_NAME@
 #define	__memp_fget_pp __memp_fget_pp@DB_VERSION_UNIQUE_NAME@
 #define	__memp_fget __memp_fget@DB_VERSION_UNIQUE_NAME@
+#define	__memp_find_obsolete_version __memp_find_obsolete_version@DB_VERSION_UNIQUE_NAME@
 #define	__memp_fcreate_pp __memp_fcreate_pp@DB_VERSION_UNIQUE_NAME@
 #define	__memp_fcreate __memp_fcreate@DB_VERSION_UNIQUE_NAME@
 #define	__memp_set_clear_len __memp_set_clear_len@DB_VERSION_UNIQUE_NAME@
@@ -1385,6 +1522,7 @@
 #define	__memp_get_ftype __memp_get_ftype@DB_VERSION_UNIQUE_NAME@
 #define	__memp_set_ftype __memp_set_ftype@DB_VERSION_UNIQUE_NAME@
 #define	__memp_set_lsn_offset __memp_set_lsn_offset@DB_VERSION_UNIQUE_NAME@
+#define	__memp_set_maxpgno __memp_set_maxpgno@DB_VERSION_UNIQUE_NAME@
 #define	__memp_get_pgcookie __memp_get_pgcookie@DB_VERSION_UNIQUE_NAME@
 #define	__memp_set_pgcookie __memp_set_pgcookie@DB_VERSION_UNIQUE_NAME@
 #define	__memp_get_priority __memp_get_priority@DB_VERSION_UNIQUE_NAME@
@@ -1432,10 +1570,12 @@
 #define	__memp_bh_freeze __memp_bh_freeze@DB_VERSION_UNIQUE_NAME@
 #define	__memp_bh_thaw __memp_bh_thaw@DB_VERSION_UNIQUE_NAME@
 #define	__memp_open __memp_open@DB_VERSION_UNIQUE_NAME@
+#define	__memp_region_detach __memp_region_detach@DB_VERSION_UNIQUE_NAME@
 #define	__memp_init __memp_init@DB_VERSION_UNIQUE_NAME@
 #define	__memp_max_regions __memp_max_regions@DB_VERSION_UNIQUE_NAME@
 #define	__memp_region_mutex_count __memp_region_mutex_count@DB_VERSION_UNIQUE_NAME@
 #define	__memp_env_refresh __memp_env_refresh@DB_VERSION_UNIQUE_NAME@
+#define	__memp_region_bhfree __memp_region_bhfree@DB_VERSION_UNIQUE_NAME@
 #define	__memp_register_pp __memp_register_pp@DB_VERSION_UNIQUE_NAME@
 #define	__memp_register __memp_register@DB_VERSION_UNIQUE_NAME@
 #define	__memp_get_bucket __memp_get_bucket@DB_VERSION_UNIQUE_NAME@
@@ -1460,13 +1600,13 @@
 #define	__mutex_alloc_int __mutex_alloc_int@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_free __mutex_free@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_free_int __mutex_free_int@DB_VERSION_UNIQUE_NAME@
+#define	__mutex_died __mutex_died@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_refresh __mutex_refresh@DB_VERSION_UNIQUE_NAME@
-#define	__mut_failchk __mut_failchk@DB_VERSION_UNIQUE_NAME@
-#define	__db_fcntl_mutex_init __db_fcntl_mutex_init@DB_VERSION_UNIQUE_NAME@
-#define	__db_fcntl_mutex_lock __db_fcntl_mutex_lock@DB_VERSION_UNIQUE_NAME@
-#define	__db_fcntl_mutex_trylock __db_fcntl_mutex_trylock@DB_VERSION_UNIQUE_NAME@
-#define	__db_fcntl_mutex_unlock __db_fcntl_mutex_unlock@DB_VERSION_UNIQUE_NAME@
-#define	__db_fcntl_mutex_destroy __db_fcntl_mutex_destroy@DB_VERSION_UNIQUE_NAME@
+#define	__mutex_record_lock __mutex_record_lock@DB_VERSION_UNIQUE_NAME@
+#define	__mutex_record_unlock __mutex_record_unlock@DB_VERSION_UNIQUE_NAME@
+#define	__mutex_record_print __mutex_record_print@DB_VERSION_UNIQUE_NAME@
+#define	__mutex_failchk __mutex_failchk@DB_VERSION_UNIQUE_NAME@
+#define	__mutex_failchk_thread __mutex_failchk_thread@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_alloc_pp __mutex_alloc_pp@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_free_pp __mutex_free_pp@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_lock_pp __mutex_lock_pp@DB_VERSION_UNIQUE_NAME@
@@ -1481,6 +1621,9 @@
 #define	__mutex_set_max __mutex_set_max@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_get_tas_spins __mutex_get_tas_spins@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_set_tas_spins __mutex_set_tas_spins@DB_VERSION_UNIQUE_NAME@
+#ifdef HAVE_ERROR_HISTORY
+#define	__mutex_diags __mutex_diags@DB_VERSION_UNIQUE_NAME@
+#endif
 #if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT)
 #define	__atomic_inc __atomic_inc@DB_VERSION_UNIQUE_NAME@
 #endif
@@ -1503,6 +1646,7 @@
 #define	__db_pthread_mutex_unlock __db_pthread_mutex_unlock@DB_VERSION_UNIQUE_NAME@
 #define	__db_pthread_mutex_destroy __db_pthread_mutex_destroy@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_open __mutex_open@DB_VERSION_UNIQUE_NAME@
+#define	__mutex_region_detach __mutex_region_detach@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_env_refresh __mutex_env_refresh@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_resource_return __mutex_resource_return@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_stat_pp __mutex_stat_pp@DB_VERSION_UNIQUE_NAME@
@@ -1512,6 +1656,7 @@
 #define	__mutex_print_debug_stats __mutex_print_debug_stats@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_set_wait_info __mutex_set_wait_info@DB_VERSION_UNIQUE_NAME@
 #define	__mutex_clear __mutex_clear@DB_VERSION_UNIQUE_NAME@
+#define	__mutex_describe __mutex_describe@DB_VERSION_UNIQUE_NAME@
 #define	__db_tas_mutex_init __db_tas_mutex_init@DB_VERSION_UNIQUE_NAME@
 #define	__db_tas_mutex_lock __db_tas_mutex_lock@DB_VERSION_UNIQUE_NAME@
 #define	__db_tas_mutex_trylock __db_tas_mutex_trylock@DB_VERSION_UNIQUE_NAME@
@@ -1582,6 +1727,7 @@
 #define	__os_concat_path __os_concat_path@DB_VERSION_UNIQUE_NAME@
 #define	__os_id __os_id@DB_VERSION_UNIQUE_NAME@
 #define	__os_rename __os_rename@DB_VERSION_UNIQUE_NAME@
+#define	__os_rmdir __os_rmdir@DB_VERSION_UNIQUE_NAME@
 #define	__os_isroot __os_isroot@DB_VERSION_UNIQUE_NAME@
 #define	__db_rpath __db_rpath@DB_VERSION_UNIQUE_NAME@
 #define	__os_io __os_io@DB_VERSION_UNIQUE_NAME@
@@ -1590,16 +1736,37 @@
 #define	__os_physwrite __os_physwrite@DB_VERSION_UNIQUE_NAME@
 #define	__os_seek __os_seek@DB_VERSION_UNIQUE_NAME@
 #define	__os_stack __os_stack@DB_VERSION_UNIQUE_NAME@
+#define	__os_stack_top __os_stack_top@DB_VERSION_UNIQUE_NAME@
+#define	__os_stack_text __os_stack_text@DB_VERSION_UNIQUE_NAME@
+#define	__os_stack_save __os_stack_save@DB_VERSION_UNIQUE_NAME@
+#define	__os_stack_msgadd __os_stack_msgadd@DB_VERSION_UNIQUE_NAME@
 #define	__os_exists __os_exists@DB_VERSION_UNIQUE_NAME@
 #define	__os_ioinfo __os_ioinfo@DB_VERSION_UNIQUE_NAME@
 #define	__os_tmpdir __os_tmpdir@DB_VERSION_UNIQUE_NAME@
 #define	__os_truncate __os_truncate@DB_VERSION_UNIQUE_NAME@
 #define	__os_unique_id __os_unique_id@DB_VERSION_UNIQUE_NAME@
+#define	__os_srandom __os_srandom@DB_VERSION_UNIQUE_NAME@
+#define	__os_random __os_random@DB_VERSION_UNIQUE_NAME@
 #define	__os_unlink __os_unlink@DB_VERSION_UNIQUE_NAME@
 #define	__os_yield __os_yield@DB_VERSION_UNIQUE_NAME@
 #ifdef HAVE_QNX
 #define	__os_qnx_region_open __os_qnx_region_open@DB_VERSION_UNIQUE_NAME@
 #endif
+#ifdef DB_WINCE
+#define	__ce_freopen __ce_freopen@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifdef DB_WINCE
+#define	__ce_gmtime __ce_gmtime@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifdef DB_WINCE
+#define	localtime localtime@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifdef DB_WINCE
+#define	__ce_mktime __ce_mktime@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifdef DB_WINCE
+#define	__ce_remove __ce_remove@DB_VERSION_UNIQUE_NAME@
+#endif
 #define	__os_is_winnt __os_is_winnt@DB_VERSION_UNIQUE_NAME@
 #define	__os_cpu_count __os_cpu_count@DB_VERSION_UNIQUE_NAME@
 #ifdef HAVE_REPLICATION_THREADS
@@ -1673,6 +1840,8 @@
 #define	__rep_egen_unmarshal __rep_egen_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__rep_fileinfo_marshal __rep_fileinfo_marshal@DB_VERSION_UNIQUE_NAME@
 #define	__rep_fileinfo_unmarshal __rep_fileinfo_unmarshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_fileinfo_v7_marshal __rep_fileinfo_v7_marshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_fileinfo_v7_unmarshal __rep_fileinfo_v7_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__rep_fileinfo_v6_marshal __rep_fileinfo_v6_marshal@DB_VERSION_UNIQUE_NAME@
 #define	__rep_fileinfo_v6_unmarshal __rep_fileinfo_v6_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__rep_grant_info_marshal __rep_grant_info_marshal@DB_VERSION_UNIQUE_NAME@
@@ -1691,13 +1860,29 @@
 #define	__rep_lsn_hist_key_unmarshal __rep_lsn_hist_key_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__rep_lsn_hist_data_marshal __rep_lsn_hist_data_marshal@DB_VERSION_UNIQUE_NAME@
 #define	__rep_lsn_hist_data_unmarshal __rep_lsn_hist_data_unmarshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_update_req_marshal __rep_blob_update_req_marshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_update_req_unmarshal __rep_blob_update_req_unmarshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_update_marshal __rep_blob_update_marshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_update_unmarshal __rep_blob_update_unmarshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_file_marshal __rep_blob_file_marshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_file_unmarshal __rep_blob_file_unmarshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_chunk_marshal __rep_blob_chunk_marshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_chunk_unmarshal __rep_blob_chunk_unmarshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_chunk_req_marshal __rep_blob_chunk_req_marshal@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_chunk_req_unmarshal __rep_blob_chunk_req_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__rep_update_req __rep_update_req@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_update_req __rep_blob_update_req@DB_VERSION_UNIQUE_NAME@
 #define	__rep_page_req __rep_page_req@DB_VERSION_UNIQUE_NAME@
 #define	__rep_update_setup __rep_update_setup@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_update __rep_blob_update@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_allreq __rep_blob_allreq@DB_VERSION_UNIQUE_NAME@
 #define	__rep_bulk_page __rep_bulk_page@DB_VERSION_UNIQUE_NAME@
 #define	__rep_page __rep_page@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_chunk __rep_blob_chunk@DB_VERSION_UNIQUE_NAME@
 #define	__rep_init_cleanup __rep_init_cleanup@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_chunk_req __rep_blob_chunk_req@DB_VERSION_UNIQUE_NAME@
 #define	__rep_pggap_req __rep_pggap_req@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_rereq __rep_blob_rereq@DB_VERSION_UNIQUE_NAME@
 #define	__rep_finfo_alloc __rep_finfo_alloc@DB_VERSION_UNIQUE_NAME@
 #define	__rep_remove_init_file __rep_remove_init_file@DB_VERSION_UNIQUE_NAME@
 #define	__rep_reset_init __rep_reset_init@DB_VERSION_UNIQUE_NAME@
@@ -1727,24 +1912,32 @@
 #define	__rep_start_int __rep_start_int@DB_VERSION_UNIQUE_NAME@
 #define	__rep_open_sysdb __rep_open_sysdb@DB_VERSION_UNIQUE_NAME@
 #define	__rep_client_dbinit __rep_client_dbinit@DB_VERSION_UNIQUE_NAME@
+#define	__rep_blob_cmp __rep_blob_cmp@DB_VERSION_UNIQUE_NAME@
+#define	__rep_offset_cmp __rep_offset_cmp@DB_VERSION_UNIQUE_NAME@
 #define	__rep_get_limit __rep_get_limit@DB_VERSION_UNIQUE_NAME@
 #define	__rep_set_limit __rep_set_limit@DB_VERSION_UNIQUE_NAME@
 #define	__rep_set_nsites_pp __rep_set_nsites_pp@DB_VERSION_UNIQUE_NAME@
 #define	__rep_set_nsites_int __rep_set_nsites_int@DB_VERSION_UNIQUE_NAME@
 #define	__rep_get_nsites __rep_get_nsites@DB_VERSION_UNIQUE_NAME@
-#define	__rep_set_priority __rep_set_priority@DB_VERSION_UNIQUE_NAME@
+#define	__rep_set_priority_pp __rep_set_priority_pp@DB_VERSION_UNIQUE_NAME@
+#define	__rep_set_priority_int __rep_set_priority_int@DB_VERSION_UNIQUE_NAME@
 #define	__rep_get_priority __rep_get_priority@DB_VERSION_UNIQUE_NAME@
-#define	__rep_set_timeout __rep_set_timeout@DB_VERSION_UNIQUE_NAME@
+#define	__rep_set_timeout_pp __rep_set_timeout_pp@DB_VERSION_UNIQUE_NAME@
+#define	__rep_set_timeout_int __rep_set_timeout_int@DB_VERSION_UNIQUE_NAME@
 #define	__rep_get_timeout __rep_get_timeout@DB_VERSION_UNIQUE_NAME@
 #define	__rep_get_request __rep_get_request@DB_VERSION_UNIQUE_NAME@
 #define	__rep_set_request __rep_set_request@DB_VERSION_UNIQUE_NAME@
+#define	__rep_set_view __rep_set_view@DB_VERSION_UNIQUE_NAME@
+#define	__rep_call_partial __rep_call_partial@DB_VERSION_UNIQUE_NAME@
 #define	__rep_set_transport_pp __rep_set_transport_pp@DB_VERSION_UNIQUE_NAME@
 #define	__rep_set_transport_int __rep_set_transport_int@DB_VERSION_UNIQUE_NAME@
 #define	__rep_get_clockskew __rep_get_clockskew@DB_VERSION_UNIQUE_NAME@
 #define	__rep_set_clockskew __rep_set_clockskew@DB_VERSION_UNIQUE_NAME@
-#define	__rep_flush __rep_flush@DB_VERSION_UNIQUE_NAME@
+#define	__rep_flush_pp __rep_flush_pp@DB_VERSION_UNIQUE_NAME@
+#define	__rep_flush_int __rep_flush_int@DB_VERSION_UNIQUE_NAME@
 #define	__rep_sync __rep_sync@DB_VERSION_UNIQUE_NAME@
 #define	__rep_txn_applied __rep_txn_applied@DB_VERSION_UNIQUE_NAME@
+#define	__rep_read_lsn_history __rep_read_lsn_history@DB_VERSION_UNIQUE_NAME@
 #define	__rep_process_message_pp __rep_process_message_pp@DB_VERSION_UNIQUE_NAME@
 #define	__rep_process_message_int __rep_process_message_int@DB_VERSION_UNIQUE_NAME@
 #define	__rep_apply __rep_apply@DB_VERSION_UNIQUE_NAME@
@@ -1760,6 +1953,7 @@
 #define	__rep_closefiles __rep_closefiles@DB_VERSION_UNIQUE_NAME@
 #define	__rep_write_egen __rep_write_egen@DB_VERSION_UNIQUE_NAME@
 #define	__rep_write_gen __rep_write_gen@DB_VERSION_UNIQUE_NAME@
+#define	__rep_check_view __rep_check_view@DB_VERSION_UNIQUE_NAME@
 #define	__rep_stat_pp __rep_stat_pp@DB_VERSION_UNIQUE_NAME@
 #define	__rep_stat_print_pp __rep_stat_print_pp@DB_VERSION_UNIQUE_NAME@
 #define	__rep_stat_print __rep_stat_print@DB_VERSION_UNIQUE_NAME@
@@ -1798,6 +1992,8 @@
 #define	__rep_get_maxpermlsn __rep_get_maxpermlsn@DB_VERSION_UNIQUE_NAME@
 #define	__rep_is_internal_rep_file __rep_is_internal_rep_file@DB_VERSION_UNIQUE_NAME@
 #define	__rep_get_datagen __rep_get_datagen@DB_VERSION_UNIQUE_NAME@
+#define	__rep_become_readonly_master __rep_become_readonly_master@DB_VERSION_UNIQUE_NAME@
+#define	__rep_get_lsnhist_data __rep_get_lsnhist_data@DB_VERSION_UNIQUE_NAME@
 #define	__rep_verify __rep_verify@DB_VERSION_UNIQUE_NAME@
 #define	__rep_verify_fail __rep_verify_fail@DB_VERSION_UNIQUE_NAME@
 #define	__rep_verify_req __rep_verify_req@DB_VERSION_UNIQUE_NAME@
@@ -1827,6 +2023,8 @@
 #define	__repmgr_membership_key_unmarshal __repmgr_membership_key_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_membership_data_marshal __repmgr_membership_data_marshal@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_membership_data_unmarshal __repmgr_membership_data_unmarshal@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_v4membership_data_marshal __repmgr_v4membership_data_marshal@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_v4membership_data_unmarshal __repmgr_v4membership_data_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_member_metadata_marshal __repmgr_member_metadata_marshal@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_member_metadata_unmarshal __repmgr_member_metadata_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_gm_fwd_marshal __repmgr_gm_fwd_marshal@DB_VERSION_UNIQUE_NAME@
@@ -1835,21 +2033,34 @@
 #define	__repmgr_membr_vers_unmarshal __repmgr_membr_vers_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_site_info_marshal __repmgr_site_info_marshal@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_site_info_unmarshal __repmgr_site_info_unmarshal@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_v4site_info_marshal __repmgr_v4site_info_marshal@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_v4site_info_unmarshal __repmgr_v4site_info_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_connect_reject_marshal __repmgr_connect_reject_marshal@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_connect_reject_unmarshal __repmgr_connect_reject_unmarshal@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_v4connect_reject_marshal __repmgr_v4connect_reject_marshal@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_v4connect_reject_unmarshal __repmgr_v4connect_reject_unmarshal@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_lsnhist_match_marshal __repmgr_lsnhist_match_marshal@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_lsnhist_match_unmarshal __repmgr_lsnhist_match_unmarshal@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_member_print __repmgr_member_print@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_init_print __repmgr_init_print@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_init_election __repmgr_init_election@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_claim_victory __repmgr_claim_victory@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_turn_on_elections __repmgr_turn_on_elections@DB_VERSION_UNIQUE_NAME@
-#define	__repmgr_start __repmgr_start@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_start_pp __repmgr_start_pp@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_start_int __repmgr_start_int@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_valid_config __repmgr_valid_config@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_prefmas_auto_config __repmgr_prefmas_auto_config@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_autostart __repmgr_autostart@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_start_selector __repmgr_start_selector@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_close __repmgr_close@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_stop __repmgr_stop@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_set_ack_policy __repmgr_set_ack_policy@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_get_ack_policy __repmgr_get_ack_policy@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_set_incoming_queue_max __repmgr_set_incoming_queue_max@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_get_incoming_queue_max __repmgr_get_incoming_queue_max@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_set_incoming_queue_redzone __repmgr_set_incoming_queue_redzone@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_get_incoming_queue_redzone __repmgr_get_incoming_queue_redzone@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_get_incoming_queue_fullevent __repmgr_get_incoming_queue_fullevent@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_env_create __repmgr_env_create@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_env_destroy __repmgr_env_destroy@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_stop_threads __repmgr_stop_threads@DB_VERSION_UNIQUE_NAME@
@@ -1870,7 +2081,8 @@
 #define	__repmgr_get_site_address __repmgr_get_site_address@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_get_eid __repmgr_get_eid@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_get_config __repmgr_get_config@DB_VERSION_UNIQUE_NAME@
-#define	__repmgr_site_config __repmgr_site_config@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_site_config_pp __repmgr_site_config_pp@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_site_config_int __repmgr_site_config_int@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_site_close __repmgr_site_close@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_msg_thread __repmgr_msg_thread@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_send_err_resp __repmgr_send_err_resp@DB_VERSION_UNIQUE_NAME@
@@ -1930,7 +2142,6 @@
 #define	__repmgr_queue_destroy __repmgr_queue_destroy@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_queue_get __repmgr_queue_get@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_queue_put __repmgr_queue_put@DB_VERSION_UNIQUE_NAME@
-#define	__repmgr_queue_size __repmgr_queue_size@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_member_recover __repmgr_member_recover@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_select_thread __repmgr_select_thread@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_bow_out __repmgr_bow_out@DB_VERSION_UNIQUE_NAME@
@@ -1938,6 +2149,7 @@
 #define	__repmgr_compute_timeout __repmgr_compute_timeout@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_connected_master __repmgr_connected_master@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_check_timeouts __repmgr_check_timeouts@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_refresh_selector __repmgr_refresh_selector@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_first_try_connections __repmgr_first_try_connections@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_send_v1_handshake __repmgr_send_v1_handshake@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_read_from_site __repmgr_read_from_site@DB_VERSION_UNIQUE_NAME@
@@ -1949,7 +2161,8 @@
 #define	__repmgr_stat_pp __repmgr_stat_pp@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_stat_print_pp __repmgr_stat_print_pp@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_stat_print __repmgr_stat_print@DB_VERSION_UNIQUE_NAME@
-#define	__repmgr_site_list __repmgr_site_list@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_site_list_pp __repmgr_site_list_pp@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_site_list_int __repmgr_site_list_int@DB_VERSION_UNIQUE_NAME@
 #ifndef HAVE_REPLICATION_THREADS
 #define	__repmgr_close __repmgr_close@DB_VERSION_UNIQUE_NAME@
 #endif
@@ -1960,6 +2173,18 @@
 #define	__repmgr_set_ack_policy __repmgr_set_ack_policy@DB_VERSION_UNIQUE_NAME@
 #endif
 #ifndef HAVE_REPLICATION_THREADS
+#define	__repmgr_get_incoming_queue_max __repmgr_get_incoming_queue_max@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+#define	__repmgr_set_incoming_queue_max __repmgr_set_incoming_queue_max@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+#define	__repmgr_get_incoming_queue_redzone __repmgr_get_incoming_queue_redzone@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+#define	__repmgr_get_incoming_queue_fullevent __repmgr_get_incoming_queue_fullevent@DB_VERSION_UNIQUE_NAME@
+#endif
+#ifndef HAVE_REPLICATION_THREADS
 #define	__repmgr_site __repmgr_site@DB_VERSION_UNIQUE_NAME@
 #endif
 #ifndef HAVE_REPLICATION_THREADS
@@ -1969,10 +2194,10 @@
 #define	__repmgr_local_site __repmgr_local_site@DB_VERSION_UNIQUE_NAME@
 #endif
 #ifndef HAVE_REPLICATION_THREADS
-#define	__repmgr_site_list __repmgr_site_list@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_site_list_pp __repmgr_site_list_pp@DB_VERSION_UNIQUE_NAME@
 #endif
 #ifndef HAVE_REPLICATION_THREADS
-#define	__repmgr_start __repmgr_start@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_start_pp __repmgr_start_pp@DB_VERSION_UNIQUE_NAME@
 #endif
 #ifndef HAVE_REPLICATION_THREADS
 #define	__repmgr_stat_pp __repmgr_stat_pp@DB_VERSION_UNIQUE_NAME@
@@ -2023,7 +2248,14 @@
 #define	__repmgr_failchk __repmgr_failchk@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_master_is_known __repmgr_master_is_known@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_stable_lsn __repmgr_stable_lsn@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_make_request_conn __repmgr_make_request_conn@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_send_sync_msg __repmgr_send_sync_msg@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_read_own_msg __repmgr_read_own_msg@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_prefmas_connected __repmgr_prefmas_connected@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_restart_site_as_client __repmgr_restart_site_as_client@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_make_site_readonly_master __repmgr_make_site_readonly_master@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_lsnhist_match __repmgr_lsnhist_match@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_prefmas_get_wait __repmgr_prefmas_get_wait@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_marshal_member_list __repmgr_marshal_member_list@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_refresh_membership __repmgr_refresh_membership@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_reload_gmdb __repmgr_reload_gmdb@DB_VERSION_UNIQUE_NAME@
@@ -2040,10 +2272,15 @@
 #define	__repmgr_bcast_parm_refresh __repmgr_bcast_parm_refresh@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_chg_prio __repmgr_chg_prio@DB_VERSION_UNIQUE_NAME@
 #define	__repmgr_bcast_own_msg __repmgr_bcast_own_msg@DB_VERSION_UNIQUE_NAME@
+#define	__repmgr_bcast_member_list __repmgr_bcast_member_list@DB_VERSION_UNIQUE_NAME@
 #define	__seq_stat __seq_stat@DB_VERSION_UNIQUE_NAME@
 #define	__seq_stat_print __seq_stat_print@DB_VERSION_UNIQUE_NAME@
 #define	__db_get_seq_flags_fn __db_get_seq_flags_fn@DB_VERSION_UNIQUE_NAME@
 #define	__db_get_seq_flags_fn __db_get_seq_flags_fn@DB_VERSION_UNIQUE_NAME@
+#define	__seq_open __seq_open@DB_VERSION_UNIQUE_NAME@
+#define	__seq_initial_value __seq_initial_value@DB_VERSION_UNIQUE_NAME@
+#define	__seq_get __seq_get@DB_VERSION_UNIQUE_NAME@
+#define	__seq_close __seq_close@DB_VERSION_UNIQUE_NAME@
 #define	bdb_HCommand bdb_HCommand@DB_VERSION_UNIQUE_NAME@
 #if DB_DBM_HSEARCH != 0
 #define	bdb_NdbmOpen bdb_NdbmOpen@DB_VERSION_UNIQUE_NAME@
@@ -2057,9 +2294,12 @@
 #define	tcl_CompactStat tcl_CompactStat@DB_VERSION_UNIQUE_NAME@
 #define	tcl_rep_send tcl_rep_send@DB_VERSION_UNIQUE_NAME@
 #define	dbc_Cmd dbc_Cmd@DB_VERSION_UNIQUE_NAME@
+#define	dbstream_Cmd dbstream_Cmd@DB_VERSION_UNIQUE_NAME@
 #define	env_Cmd env_Cmd@DB_VERSION_UNIQUE_NAME@
 #define	tcl_EnvRemove tcl_EnvRemove@DB_VERSION_UNIQUE_NAME@
 #define	tcl_EnvClose tcl_EnvClose@DB_VERSION_UNIQUE_NAME@
+#define	tcl_EnvBackup tcl_EnvBackup@DB_VERSION_UNIQUE_NAME@
+#define	tcl_EnvDbBackup tcl_EnvDbBackup@DB_VERSION_UNIQUE_NAME@
 #define	tcl_EnvIdReset tcl_EnvIdReset@DB_VERSION_UNIQUE_NAME@
 #define	tcl_EnvLsnReset tcl_EnvLsnReset@DB_VERSION_UNIQUE_NAME@
 #define	tcl_EnvVerbose tcl_EnvVerbose@DB_VERSION_UNIQUE_NAME@
@@ -2069,6 +2309,7 @@
 #define	tcl_EnvGetEncryptFlags tcl_EnvGetEncryptFlags@DB_VERSION_UNIQUE_NAME@
 #define	tcl_EnvSetErrfile tcl_EnvSetErrfile@DB_VERSION_UNIQUE_NAME@
 #define	tcl_EnvSetMsgfile tcl_EnvSetMsgfile@DB_VERSION_UNIQUE_NAME@
+#define	tcl_EnvCloseMsgfile tcl_EnvCloseMsgfile@DB_VERSION_UNIQUE_NAME@
 #define	tcl_EnvSetErrpfx tcl_EnvSetErrpfx@DB_VERSION_UNIQUE_NAME@
 #define	tcl_EnvStatPrint tcl_EnvStatPrint@DB_VERSION_UNIQUE_NAME@
 #define	_NewInfo _NewInfo@DB_VERSION_UNIQUE_NAME@
@@ -2111,9 +2352,11 @@
 #define	tcl_LogPut tcl_LogPut@DB_VERSION_UNIQUE_NAME@
 #define	tcl_LogStat tcl_LogStat@DB_VERSION_UNIQUE_NAME@
 #define	tcl_LogStatPrint tcl_LogStatPrint@DB_VERSION_UNIQUE_NAME@
+#define	tcl_LogVerify tcl_LogVerify@DB_VERSION_UNIQUE_NAME@
 #define	logc_Cmd logc_Cmd@DB_VERSION_UNIQUE_NAME@
 #define	tcl_LogConfig tcl_LogConfig@DB_VERSION_UNIQUE_NAME@
 #define	tcl_LogGetConfig tcl_LogGetConfig@DB_VERSION_UNIQUE_NAME@
+#define	tcl_LogSetMax tcl_LogSetMax@DB_VERSION_UNIQUE_NAME@
 #define	_MpInfoDelete _MpInfoDelete@DB_VERSION_UNIQUE_NAME@
 #define	tcl_MpSync tcl_MpSync@DB_VERSION_UNIQUE_NAME@
 #define	tcl_MpTrickle tcl_MpTrickle@DB_VERSION_UNIQUE_NAME@
@@ -2121,6 +2364,7 @@
 #define	tcl_MpStat tcl_MpStat@DB_VERSION_UNIQUE_NAME@
 #define	tcl_MpStatPrint tcl_MpStatPrint@DB_VERSION_UNIQUE_NAME@
 #define	tcl_Mutex tcl_Mutex@DB_VERSION_UNIQUE_NAME@
+#define	tcl_MutexFailchkTimeout tcl_MutexFailchkTimeout@DB_VERSION_UNIQUE_NAME@
 #define	tcl_MutFree tcl_MutFree@DB_VERSION_UNIQUE_NAME@
 #define	tcl_MutGet tcl_MutGet@DB_VERSION_UNIQUE_NAME@
 #define	tcl_MutLock tcl_MutLock@DB_VERSION_UNIQUE_NAME@
@@ -2227,6 +2471,7 @@
 #define	__txn_get_prepared __txn_get_prepared@DB_VERSION_UNIQUE_NAME@
 #define	__txn_openfiles __txn_openfiles@DB_VERSION_UNIQUE_NAME@
 #define	__txn_open __txn_open@DB_VERSION_UNIQUE_NAME@
+#define	__txn_region_detach __txn_region_detach@DB_VERSION_UNIQUE_NAME@
 #define	__txn_findlastckp __txn_findlastckp@DB_VERSION_UNIQUE_NAME@
 #define	__txn_env_refresh __txn_env_refresh@DB_VERSION_UNIQUE_NAME@
 #define	__txn_region_mutex_count __txn_region_mutex_count@DB_VERSION_UNIQUE_NAME@
@@ -2234,7 +2479,7 @@
 #define	__txn_region_size __txn_region_size@DB_VERSION_UNIQUE_NAME@
 #define	__txn_region_max __txn_region_max@DB_VERSION_UNIQUE_NAME@
 #define	__txn_id_set __txn_id_set@DB_VERSION_UNIQUE_NAME@
-#define	__txn_oldest_reader __txn_oldest_reader@DB_VERSION_UNIQUE_NAME@
+#define	__txn_get_readers __txn_get_readers@DB_VERSION_UNIQUE_NAME@
 #define	__txn_add_buffer __txn_add_buffer@DB_VERSION_UNIQUE_NAME@
 #define	__txn_remove_buffer __txn_remove_buffer@DB_VERSION_UNIQUE_NAME@
 #define	__txn_stat_pp __txn_stat_pp@DB_VERSION_UNIQUE_NAME@
diff --git a/src/dbinc_auto/lock_ext.h b/src/dbinc_auto/lock_ext.h
index d5981e18..3d2c37a3 100644
--- a/src/dbinc_auto/lock_ext.h
+++ b/src/dbinc_auto/lock_ext.h
@@ -28,10 +28,11 @@ int __lock_id_free_pp __P((DB_ENV *, u_int32_t));
 int  __lock_id_free __P((ENV *, DB_LOCKER *));
 int __lock_id_set __P((ENV *, u_int32_t, u_int32_t));
 int __lock_getlocker __P((DB_LOCKTAB *, u_int32_t, int, DB_LOCKER **));
-int __lock_getlocker_int __P((DB_LOCKTAB *, u_int32_t, int, DB_LOCKER **));
+int __lock_getlocker_int __P((DB_LOCKTAB *, u_int32_t, int, DB_THREAD_INFO *, DB_LOCKER **));
 int __lock_addfamilylocker __P((ENV *, u_int32_t, u_int32_t, u_int32_t));
 int __lock_freelocker  __P((DB_LOCKTAB *, DB_LOCKER *));
 int __lock_familyremove  __P((DB_LOCKTAB *, DB_LOCKER *));
+int __lock_local_locker_invalidate  __P((ENV *, db_mutex_t));
 int __lock_fix_list __P((ENV *, DBT *, u_int32_t));
 int __lock_get_list __P((ENV *, DB_LOCKER *, u_int32_t, db_lockmode_t, DBT *));
 void __lock_list_print __P((ENV *, DB_MSGBUF *, DBT *));
@@ -57,6 +58,7 @@ int __lock_get_env_timeout __P((DB_ENV *, db_timeout_t *, u_int32_t));
 int __lock_set_env_timeout __P((DB_ENV *, db_timeout_t, u_int32_t));
 int __lock_open __P((ENV *));
 int __lock_env_refresh __P((ENV *));
+int __lock_region_detach __P((ENV *, DB_LOCKTAB *));
 u_int32_t __lock_region_mutex_count __P((ENV *));
 u_int32_t __lock_region_mutex_max __P((ENV *));
 size_t __lock_region_max __P((ENV *));
@@ -65,6 +67,7 @@ int __lock_stat_pp __P((DB_ENV *, DB_LOCK_STAT **, u_int32_t));
 int __lock_stat_print_pp __P((DB_ENV *, u_int32_t));
 int  __lock_stat_print __P((ENV *, u_int32_t));
 void __lock_printlock __P((DB_LOCKTAB *, DB_MSGBUF *mbp, struct __db_lock *, int));
+int  __lock_dump_locker __P((ENV *, DB_MSGBUF *, DB_LOCKTAB *, DB_LOCKER *));
 int __lock_set_timeout __P((ENV *, DB_LOCKER *, db_timeout_t, u_int32_t));
 int __lock_set_timeout_internal __P((ENV *, DB_LOCKER *, db_timeout_t, u_int32_t));
 int __lock_inherit_timeout __P((ENV *, DB_LOCKER *, DB_LOCKER *));
diff --git a/src/dbinc_auto/log_ext.h b/src/dbinc_auto/log_ext.h
index dde6742d..769643fa 100644
--- a/src/dbinc_auto/log_ext.h
+++ b/src/dbinc_auto/log_ext.h
@@ -7,6 +7,7 @@ extern "C" {
 #endif
 
 int __log_open __P((ENV *));
+int __log_region_detach __P((ENV *, DB_LOG *));
 int __log_find __P((DB_LOG *, int, u_int32_t *, logfile_validity *));
 int __log_valid __P((DB_LOG *, u_int32_t, int, DB_FH **, u_int32_t, logfile_validity *, u_int32_t *));
 int __log_env_refresh __P((ENV *));
@@ -72,6 +73,7 @@ int __log_flush_int __P((DB_LOG *, const DB_LSN *, int));
 int __log_file_pp __P((DB_ENV *, const DB_LSN *, char *, size_t));
 int __log_name __P((DB_LOG *, u_int32_t, char **, DB_FH **, u_int32_t));
 int __log_rep_put __P((ENV *, DB_LSN *, const DBT *, u_int32_t));
+int __log_rep_write __P((ENV *));
 int __log_put_record_pp __P((DB_ENV *, DB *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, u_int32_t, u_int32_t, DB_LOG_RECSPEC *, ...));
 int __log_put_record __P((ENV *, DB *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, u_int32_t, u_int32_t, DB_LOG_RECSPEC *, ...));
 int __log_stat_pp __P((DB_ENV *, DB_LOG_STAT **, u_int32_t));
@@ -115,6 +117,7 @@ int __db_relink_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __db_merge_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __db_pgno_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __dbreg_register_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __dbreg_register_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __bam_split_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __bam_split_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __bam_rsplit_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
@@ -129,12 +132,19 @@ int __bam_rcuradj_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __bam_relink_43_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __bam_merge_44_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_create_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_create_60_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_create_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_remove_60_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_remove_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_write_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_write_60_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_write_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_write_file_60_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_write_file_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_rename_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_rename_60_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_rename_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __fop_file_remove_60_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __fop_file_remove_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __ham_insdel_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __ham_newpage_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
@@ -150,6 +160,7 @@ int __ham_contract_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __ham_curadj_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __ham_chgpg_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_addrem_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+int __heap_addrem_60_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_pg_alloc_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_trunc_meta_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __heap_trunc_page_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
diff --git a/src/dbinc_auto/mp_ext.h b/src/dbinc_auto/mp_ext.h
index d142b584..3f5a397b 100644
--- a/src/dbinc_auto/mp_ext.h
+++ b/src/dbinc_auto/mp_ext.h
@@ -6,6 +6,7 @@
 extern "C" {
 #endif
 
+int __memp_bh_unreachable __P((ENV *, BH *, DB_LSN *, int));
 int __memp_alloc __P((DB_MPOOL *, REGINFO *, MPOOLFILE *, size_t, roff_t *, void *));
 void __memp_free __P((REGINFO *, void *));
 int __memp_backup_open __P((ENV *, DB_MPOOLFILE *, const char *, const char *, u_int32_t, DB_FH **, void**));
@@ -18,6 +19,7 @@ int __memp_pg __P((DB_MPOOLFILE *, db_pgno_t, void *, int));
 int __memp_bhfree __P((DB_MPOOL *, REGINFO *, MPOOLFILE *, DB_MPOOL_HASH *, BH *, u_int32_t));
 int __memp_fget_pp __P((DB_MPOOLFILE *, db_pgno_t *, DB_TXN *, u_int32_t, void *));
 int __memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, DB_THREAD_INFO *, DB_TXN *, u_int32_t, void *));
+int  __memp_find_obsolete_version __P((ENV *, BH *, DB_MPOOL_HASH *, BH **));
 int __memp_fcreate_pp __P((DB_ENV *, DB_MPOOLFILE **, u_int32_t));
 int __memp_fcreate __P((ENV *, DB_MPOOLFILE **));
 int __memp_set_clear_len __P((DB_MPOOLFILE *, u_int32_t));
@@ -28,6 +30,7 @@ int __memp_set_flags __P((DB_MPOOLFILE *, u_int32_t, int));
 int __memp_get_ftype __P((DB_MPOOLFILE *, int *));
 int __memp_set_ftype __P((DB_MPOOLFILE *, int));
 int __memp_set_lsn_offset __P((DB_MPOOLFILE *, int32_t));
+void __memp_set_maxpgno __P((MPOOLFILE *, u_int32_t, u_int32_t));
 int __memp_get_pgcookie __P((DB_MPOOLFILE *, DBT *));
 int __memp_set_pgcookie __P((DB_MPOOLFILE *, DBT *));
 int __memp_get_priority __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY *));
@@ -75,10 +78,12 @@ int __memp_skip_curadj __P((DBC *, db_pgno_t));
 int __memp_bh_freeze __P((DB_MPOOL *, REGINFO *, DB_MPOOL_HASH *, BH *, int *));
 int __memp_bh_thaw __P((DB_MPOOL *, REGINFO *, DB_MPOOL_HASH *, BH *, BH *));
 int __memp_open __P((ENV *, int));
+int __memp_region_detach __P((ENV *, DB_MPOOL *));
 int	__memp_init __P((ENV *, DB_MPOOL *, u_int, u_int32_t, u_int));
 u_int32_t __memp_max_regions __P((ENV *));
 u_int32_t __memp_region_mutex_count __P((ENV *));
 int __memp_env_refresh __P((ENV *));
+int __memp_region_bhfree __P((REGINFO *));
 int __memp_register_pp __P((DB_ENV *, int, int (*)(DB_ENV *, db_pgno_t, void *, DBT *), int (*)(DB_ENV *, db_pgno_t, void *, DBT *)));
 int __memp_register __P((ENV *, int, int (*)(DB_ENV *, db_pgno_t, void *, DBT *), int (*)(DB_ENV *, db_pgno_t, void *, DBT *)));
 int __memp_get_bucket __P((ENV *, MPOOLFILE *, db_pgno_t, REGINFO **, DB_MPOOL_HASH **, u_int32_t *));
diff --git a/src/dbinc_auto/mutex_ext.h b/src/dbinc_auto/mutex_ext.h
index 1a2a1b2b..673c18d0 100644
--- a/src/dbinc_auto/mutex_ext.h
+++ b/src/dbinc_auto/mutex_ext.h
@@ -10,13 +10,13 @@ int __mutex_alloc __P((ENV *, int, u_int32_t, db_mutex_t *));
 int __mutex_alloc_int __P((ENV *, int, int, u_int32_t, db_mutex_t *));
 int __mutex_free __P((ENV *, db_mutex_t *));
 int __mutex_free_int __P((ENV *, int, db_mutex_t *));
+int __mutex_died __P((ENV *, db_mutex_t));
 int __mutex_refresh __P((ENV *, db_mutex_t));
-int __mut_failchk __P((ENV *));
-int __db_fcntl_mutex_init __P((ENV *, db_mutex_t, u_int32_t));
-int __db_fcntl_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t));
-int __db_fcntl_mutex_trylock __P((ENV *, db_mutex_t));
-int __db_fcntl_mutex_unlock __P((ENV *, db_mutex_t));
-int __db_fcntl_mutex_destroy __P((ENV *, db_mutex_t));
+int __mutex_record_lock __P((ENV *, db_mutex_t, MUTEX_ACTION, MUTEX_STATE **));
+int __mutex_record_unlock __P((ENV *, db_mutex_t));
+int __mutex_record_print __P((ENV *, DB_THREAD_INFO *));
+int __mutex_failchk __P((ENV *));
+int __mutex_failchk_thread __P((ENV *, DB_THREAD_INFO *));
 int __mutex_alloc_pp __P((DB_ENV *, u_int32_t, db_mutex_t *));
 int __mutex_free_pp __P((DB_ENV *, db_mutex_t));
 int __mutex_lock_pp __P((DB_ENV *, db_mutex_t));
@@ -31,6 +31,9 @@ int __mutex_get_max __P((DB_ENV *, u_int32_t *));
 int __mutex_set_max __P((DB_ENV *, u_int32_t));
 int __mutex_get_tas_spins __P((DB_ENV *, u_int32_t *));
 int __mutex_set_tas_spins __P((DB_ENV *, u_int32_t));
+#ifdef HAVE_ERROR_HISTORY
+int __mutex_diags __P((ENV *, db_mutex_t, int));
+#endif
 #if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT)
 atomic_value_t __atomic_inc __P((ENV *, db_atomic_t *));
 #endif
@@ -53,6 +56,7 @@ int __db_hybrid_mutex_suspend __P((ENV *, db_mutex_t, db_timespec *, int));
 int __db_pthread_mutex_unlock __P((ENV *, db_mutex_t));
 int __db_pthread_mutex_destroy __P((ENV *, db_mutex_t));
 int __mutex_open __P((ENV *, int));
+int __mutex_region_detach __P((ENV *, DB_MUTEXMGR *));
 int __mutex_env_refresh __P((ENV *));
 void __mutex_resource_return __P((ENV *, REGINFO *));
 int __mutex_stat_pp __P((DB_ENV *, DB_MUTEX_STAT **, u_int32_t));
@@ -62,6 +66,7 @@ void __mutex_print_debug_single __P((ENV *, const char *, db_mutex_t, u_int32_t)
 void __mutex_print_debug_stats __P((ENV *, DB_MSGBUF *, db_mutex_t, u_int32_t));
 void __mutex_set_wait_info __P((ENV *, db_mutex_t, uintmax_t *, uintmax_t *));
 void __mutex_clear __P((ENV *, db_mutex_t));
+char *__mutex_describe __P((ENV *, db_mutex_t, char *));
 int __db_tas_mutex_init __P((ENV *, db_mutex_t, u_int32_t));
 int __db_tas_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t));
 int __db_tas_mutex_trylock __P((ENV *, db_mutex_t));
diff --git a/src/dbinc_auto/os_ext.h b/src/dbinc_auto/os_ext.h
index a0a7b791..26cf2127 100644
--- a/src/dbinc_auto/os_ext.h
+++ b/src/dbinc_auto/os_ext.h
@@ -6,7 +6,7 @@
 extern "C" {
 #endif
 
-void __os_abort __P((ENV *));
+void __os_abort __P((const ENV *));
 int __os_abspath __P((const char *));
 #if defined(HAVE_REPLICATION_THREADS)
 int __os_getaddrinfo __P((ENV *, const char *, u_int, const char *, const ADDRINFO *, ADDRINFO **));
@@ -18,12 +18,12 @@ int __os_umalloc __P((ENV *, size_t, void *));
 int __os_urealloc __P((ENV *, size_t, void *));
 void __os_ufree __P((ENV *, void *));
 int __os_strdup __P((ENV *, const char *, void *));
-int __os_calloc __P((ENV *, size_t, size_t, void *));
-int __os_malloc __P((ENV *, size_t, void *));
-int __os_realloc __P((ENV *, size_t, void *));
-void __os_free __P((ENV *, void *));
+int __os_calloc __P((const ENV *, size_t, size_t, void *));
+int __os_malloc __P((const ENV *, size_t, void *));
+int __os_realloc __P((const ENV *, size_t, void *));
+void __os_free __P((const ENV *, void *));
 void *__ua_memcpy __P((void *, const void *, size_t));
-void __os_gettime __P((ENV *, db_timespec *, int));
+void __os_gettime __P((const ENV *, db_timespec *, int));
 int __os_fs_notzero __P((void));
 int __os_support_direct_io __P((void));
 int __os_support_db_register __P((void));
@@ -54,6 +54,7 @@ int __os_open __P((ENV *, const char *, u_int32_t, u_int32_t, int, DB_FH **));
 int __os_concat_path __P((char *, size_t, const char *, const char *));
 void __os_id __P((DB_ENV *, pid_t *, db_threadid_t*));
 int __os_rename __P((ENV *, const char *, const char *, u_int32_t));
+int __os_rmdir __P((ENV *, const char *));
 int __os_isroot __P((void));
 char *__db_rpath __P((const char *));
 int __os_io __P((ENV *, int, DB_FH *, db_pgno_t, u_int32_t, u_int32_t, u_int32_t, u_int8_t *, size_t *));
@@ -61,17 +62,38 @@ int __os_read __P((ENV *, DB_FH *, void *, size_t, size_t *));
 int __os_write __P((ENV *, DB_FH *, void *, size_t, size_t *));
 int __os_physwrite __P((ENV *, DB_FH *, void *, size_t, size_t *));
 int __os_seek __P((ENV *, DB_FH *, db_pgno_t, u_int32_t, off_t));
-void __os_stack __P((ENV *));
+void __os_stack __P((const ENV *));
+void __os_stack_top __P((const ENV *, unsigned, unsigned));
+void __os_stack_text __P((const ENV *, char *, size_t, unsigned, unsigned));
+int __os_stack_save __P((const ENV *, unsigned, void **));
+void __os_stack_msgadd __P((const ENV *, DB_MSGBUF *, unsigned, unsigned, void **));
 int __os_exists __P((ENV *, const char *, int *));
 int __os_ioinfo __P((ENV *, const char *, DB_FH *, u_int32_t *, u_int32_t *, u_int32_t *));
 int __os_tmpdir __P((ENV *, u_int32_t));
-int __os_truncate __P((ENV *, DB_FH *, db_pgno_t, u_int32_t));
+int __os_truncate __P((ENV *, DB_FH *, db_pgno_t, u_int32_t, off_t));
 void __os_unique_id __P((ENV *, u_int32_t *));
+void __os_srandom __P((u_int));
+u_int __os_random __P((void));
 int __os_unlink __P((ENV *, const char *, int));
 void __os_yield __P((ENV *, u_long, u_long));
 #ifdef HAVE_QNX
 int __os_qnx_region_open __P((ENV *, const char *, int, int, DB_FH **));
 #endif
+#ifdef DB_WINCE
+FILE * __ce_freopen __P((const char *, const char *, FILE *));
+#endif
+#ifdef DB_WINCE
+struct tm * __ce_gmtime __P((const time_t *));
+#endif
+#ifdef DB_WINCE
+struct tm * localtime __P((const time_t *));
+#endif
+#ifdef DB_WINCE
+time_t __ce_mktime __P((struct tm *));
+#endif
+#ifdef DB_WINCE
+int __ce_remove __P((const char *path));
+#endif
 int __os_is_winnt __P((void));
 u_int32_t __os_cpu_count __P((void));
 #ifdef HAVE_REPLICATION_THREADS
diff --git a/src/dbinc_auto/rep_automsg.h b/src/dbinc_auto/rep_automsg.h
index 584040cf..f52c8907 100644
--- a/src/dbinc_auto/rep_automsg.h
+++ b/src/dbinc_auto/rep_automsg.h
@@ -32,7 +32,7 @@ typedef struct ___rep_egen_args {
 	u_int32_t	egen;
 } __rep_egen_args;
 
-#define	__REP_FILEINFO_SIZE	40
+#define	__REP_FILEINFO_SIZE	48
 typedef struct ___rep_fileinfo_args {
 	u_int32_t	pgsize;
 	db_pgno_t	pgno;
@@ -44,8 +44,24 @@ typedef struct ___rep_fileinfo_args {
 	DBT		uid;
 	DBT		info;
 	DBT		dir;
+	u_int32_t	blob_fid_lo;
+	u_int32_t	blob_fid_hi;
 } __rep_fileinfo_args;
 
+#define	__REP_FILEINFO_V7_SIZE	40
+typedef struct ___rep_fileinfo_v7_args {
+	u_int32_t	pgsize;
+	db_pgno_t	pgno;
+	db_pgno_t	max_pgno;
+	u_int32_t	filenum;
+	u_int32_t	finfo_flags;
+	u_int32_t	type;
+	u_int32_t	db_flags;
+	DBT		uid;
+	DBT		info;
+	DBT		dir;
+} __rep_fileinfo_v7_args;
+
 #define	__REP_FILEINFO_V6_SIZE	36
 typedef struct ___rep_fileinfo_v6_args {
 	u_int32_t	pgsize;
@@ -116,5 +132,46 @@ typedef struct ___rep_lsn_hist_data_args {
 	u_int32_t	hist_nsec;
 } __rep_lsn_hist_data_args;
 
-#define	__REP_MAXMSG_SIZE	40
+#define	__REP_BLOB_UPDATE_REQ_SIZE	32
+typedef struct ___rep_blob_update_req_args {
+	u_int64_t	blob_fid;
+	u_int64_t	blob_sid;
+	u_int64_t	blob_id;
+	u_int64_t	highest_id;
+} __rep_blob_update_req_args;
+
+#define	__REP_BLOB_UPDATE_SIZE	24
+typedef struct ___rep_blob_update_args {
+	u_int64_t	blob_fid;
+	u_int64_t	highest_id;
+	u_int32_t	flags;
+	u_int32_t	num_blobs;
+} __rep_blob_update_args;
+
+#define	__REP_BLOB_FILE_SIZE	24
+typedef struct ___rep_blob_file_args {
+	u_int64_t	blob_sid;
+	u_int64_t	blob_id;
+	u_int64_t	blob_size;
+} __rep_blob_file_args;
+
+#define	__REP_BLOB_CHUNK_SIZE	40
+typedef struct ___rep_blob_chunk_args {
+	u_int32_t	flags;
+	u_int64_t	blob_fid;
+	u_int64_t	blob_sid;
+	u_int64_t	blob_id;
+	u_int64_t	offset;
+	DBT		data;
+} __rep_blob_chunk_args;
+
+#define	__REP_BLOB_CHUNK_REQ_SIZE	32
+typedef struct ___rep_blob_chunk_req_args {
+	u_int64_t	blob_fid;
+	u_int64_t	blob_sid;
+	u_int64_t	blob_id;
+	u_int64_t	offset;
+} __rep_blob_chunk_req_args;
+
+#define	__REP_MAXMSG_SIZE	48
 #endif
diff --git a/src/dbinc_auto/rep_ext.h b/src/dbinc_auto/rep_ext.h
index 89bdc797..97740acf 100644
--- a/src/dbinc_auto/rep_ext.h
+++ b/src/dbinc_auto/rep_ext.h
@@ -14,6 +14,8 @@ int __rep_egen_marshal __P((ENV *, __rep_egen_args *, u_int8_t *, size_t, size_t
 int __rep_egen_unmarshal __P((ENV *, __rep_egen_args *, u_int8_t *, size_t, u_int8_t **));
 int __rep_fileinfo_marshal __P((ENV *, u_int32_t, __rep_fileinfo_args *, u_int8_t *, size_t, size_t *));
 int __rep_fileinfo_unmarshal __P((ENV *, u_int32_t, __rep_fileinfo_args **, u_int8_t *, size_t, u_int8_t **));
+int __rep_fileinfo_v7_marshal __P((ENV *, u_int32_t, __rep_fileinfo_v7_args *, u_int8_t *, size_t, size_t *));
+int __rep_fileinfo_v7_unmarshal __P((ENV *, u_int32_t, __rep_fileinfo_v7_args **, u_int8_t *, size_t, u_int8_t **));
 int __rep_fileinfo_v6_marshal __P((ENV *, u_int32_t, __rep_fileinfo_v6_args *, u_int8_t *, size_t, size_t *));
 int __rep_fileinfo_v6_unmarshal __P((ENV *, u_int32_t, __rep_fileinfo_v6_args **, u_int8_t *, size_t, u_int8_t **));
 int __rep_grant_info_marshal __P((ENV *, __rep_grant_info_args *, u_int8_t *, size_t, size_t *));
@@ -32,13 +34,29 @@ void __rep_lsn_hist_key_marshal __P((ENV *, __rep_lsn_hist_key_args *, u_int8_t
 int __rep_lsn_hist_key_unmarshal __P((ENV *, __rep_lsn_hist_key_args *, u_int8_t *, size_t, u_int8_t **));
 void __rep_lsn_hist_data_marshal __P((ENV *, __rep_lsn_hist_data_args *, u_int8_t *));
 int __rep_lsn_hist_data_unmarshal __P((ENV *, __rep_lsn_hist_data_args *, u_int8_t *, size_t, u_int8_t **));
+void __rep_blob_update_req_marshal __P((ENV *, __rep_blob_update_req_args *, u_int8_t *));
+int __rep_blob_update_req_unmarshal __P((ENV *, __rep_blob_update_req_args *, u_int8_t *, size_t, u_int8_t **));
+void __rep_blob_update_marshal __P((ENV *, __rep_blob_update_args *, u_int8_t *));
+int __rep_blob_update_unmarshal __P((ENV *, __rep_blob_update_args *, u_int8_t *, size_t, u_int8_t **));
+void __rep_blob_file_marshal __P((ENV *, __rep_blob_file_args *, u_int8_t *));
+int __rep_blob_file_unmarshal __P((ENV *, __rep_blob_file_args *, u_int8_t *, size_t, u_int8_t **));
+void __rep_blob_chunk_marshal __P((ENV *, __rep_blob_chunk_args *, u_int8_t *));
+int __rep_blob_chunk_unmarshal __P((ENV *, __rep_blob_chunk_args *, u_int8_t *, size_t, u_int8_t **));
+void __rep_blob_chunk_req_marshal __P((ENV *, __rep_blob_chunk_req_args *, u_int8_t *));
+int __rep_blob_chunk_req_unmarshal __P((ENV *, __rep_blob_chunk_req_args *, u_int8_t *, size_t, u_int8_t **));
 int __rep_update_req __P((ENV *, __rep_control_args *));
+int __rep_blob_update_req __P((ENV *, DB_THREAD_INFO *, DBT *));
 int __rep_page_req __P((ENV *, DB_THREAD_INFO *, int, __rep_control_args *, DBT *));
 int __rep_update_setup __P((ENV *, int, __rep_control_args *, DBT *, time_t, DB_LSN *));
+int __rep_blob_update __P((ENV *, int, DB_THREAD_INFO *, DBT *));
+int __rep_blob_allreq __P((ENV *, int, DBT *));
 int __rep_bulk_page __P((ENV *, DB_THREAD_INFO *, int, __rep_control_args *, DBT *));
 int __rep_page __P((ENV *, DB_THREAD_INFO *, int, __rep_control_args *, DBT *));
+int __rep_blob_chunk __P((ENV *, int, DB_THREAD_INFO *, DBT *));
 int __rep_init_cleanup __P((ENV *, REP *, int));
+int __rep_blob_chunk_req __P((ENV *, int, DBT *));
 int __rep_pggap_req __P((ENV *, REP *, __rep_fileinfo_args *, u_int32_t));
+int __rep_blob_rereq __P((ENV *, REP *));
 int __rep_finfo_alloc __P((ENV *, __rep_fileinfo_args *, __rep_fileinfo_args **));
 int __rep_remove_init_file __P((ENV *));
 int __rep_reset_init __P((ENV *));
@@ -65,27 +83,35 @@ void __rep_env_destroy __P((DB_ENV *));
 int __rep_get_config __P((DB_ENV *, u_int32_t, int *));
 int __rep_set_config __P((DB_ENV *, u_int32_t, int));
 int __rep_start_pp __P((DB_ENV *, DBT *, u_int32_t));
-int __rep_start_int __P((ENV *, DBT *, u_int32_t));
+int __rep_start_int __P((ENV *, DBT *, u_int32_t, u_int32_t));
 int __rep_open_sysdb __P((ENV *, DB_THREAD_INFO *, DB_TXN *, const char *, u_int32_t, DB **));
 int __rep_client_dbinit __P((ENV *, int, repdb_t));
+int  __rep_blob_cmp __P((DB *, const DBT *, const DBT *, size_t *));
+int  __rep_offset_cmp __P((DB *, const DBT *, const DBT *, size_t *));
 int __rep_get_limit __P((DB_ENV *, u_int32_t *, u_int32_t *));
 int __rep_set_limit __P((DB_ENV *, u_int32_t, u_int32_t));
 int __rep_set_nsites_pp __P((DB_ENV *, u_int32_t));
 int __rep_set_nsites_int __P((ENV *, u_int32_t));
 int __rep_get_nsites __P((DB_ENV *, u_int32_t *));
-int __rep_set_priority __P((DB_ENV *, u_int32_t));
+int __rep_set_priority_pp __P((DB_ENV *, u_int32_t));
+int __rep_set_priority_int __P((ENV *, u_int32_t));
 int __rep_get_priority __P((DB_ENV *, u_int32_t *));
-int __rep_set_timeout __P((DB_ENV *, int, db_timeout_t));
+int __rep_set_timeout_pp __P((DB_ENV *, int, db_timeout_t));
+int __rep_set_timeout_int __P((ENV *, int, db_timeout_t));
 int __rep_get_timeout __P((DB_ENV *, int, db_timeout_t *));
 int __rep_get_request __P((DB_ENV *, db_timeout_t *, db_timeout_t *));
 int __rep_set_request __P((DB_ENV *, db_timeout_t, db_timeout_t));
+int __rep_set_view __P((DB_ENV *, int (*)(DB_ENV *, const char *, int *, u_int32_t)));
+int __rep_call_partial __P((ENV *, const char *, int *, u_int32_t, DELAYED_BLOB_LIST **));
 int __rep_set_transport_pp __P((DB_ENV *, int, int (*)(DB_ENV *, const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)));
 int __rep_set_transport_int __P((ENV *, int, int (*)(DB_ENV *, const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)));
 int __rep_get_clockskew __P((DB_ENV *, u_int32_t *, u_int32_t *));
 int __rep_set_clockskew __P((DB_ENV *, u_int32_t, u_int32_t));
-int __rep_flush __P((DB_ENV *));
+int __rep_flush_pp __P((DB_ENV *));
+int __rep_flush_int __P((ENV *));
 int __rep_sync __P((DB_ENV *, u_int32_t));
 int __rep_txn_applied __P((ENV *, DB_THREAD_INFO *, DB_COMMIT_INFO *, db_timeout_t));
+int __rep_read_lsn_history __P((ENV *, DB_THREAD_INFO *, DB_TXN **, DBC **, u_int32_t, __rep_lsn_hist_data_args *, struct rep_waitgoal *, u_int32_t, int));
 int __rep_process_message_pp __P((DB_ENV *, DBT *, DBT *, int, DB_LSN *));
 int __rep_process_message_int __P((ENV *, DBT *, DBT *, int, DB_LSN *));
 int __rep_apply __P((ENV *, DB_THREAD_INFO *, __rep_control_args *, DBT *, DB_LSN *, int *, DB_LSN *));
@@ -101,6 +127,7 @@ int __rep_preclose __P((ENV *));
 int __rep_closefiles __P((ENV *));
 int __rep_write_egen __P((ENV *, REP *, u_int32_t));
 int __rep_write_gen __P((ENV *, REP *, u_int32_t));
+int __rep_check_view __P((ENV *));
 int __rep_stat_pp __P((DB_ENV *, DB_REP_STAT **, u_int32_t));
 int __rep_stat_print_pp __P((DB_ENV *, u_int32_t));
 int __rep_stat_print __P((ENV *, u_int32_t));
@@ -139,6 +166,8 @@ int __rep_log_backup __P((ENV *, DB_LOGC *, DB_LSN *, u_int32_t));
 int __rep_get_maxpermlsn __P((ENV *, DB_LSN *));
 int __rep_is_internal_rep_file __P((char *));
 int __rep_get_datagen __P((ENV *, u_int32_t *));
+int __rep_become_readonly_master __P((ENV *, u_int32_t *, DB_LSN *));
+int __rep_get_lsnhist_data __P((ENV *, DB_THREAD_INFO *, u_int32_t, __rep_lsn_hist_data_args *));
 int __rep_verify __P((ENV *, __rep_control_args *, DBT *, int, time_t));
 int __rep_verify_fail __P((ENV *, __rep_control_args *));
 int __rep_verify_req __P((ENV *, __rep_control_args *, int));
diff --git a/src/dbinc_auto/repmgr_automsg.h b/src/dbinc_auto/repmgr_automsg.h
index 1b2b928c..17e467e9 100644
--- a/src/dbinc_auto/repmgr_automsg.h
+++ b/src/dbinc_auto/repmgr_automsg.h
@@ -72,11 +72,17 @@ typedef struct ___repmgr_membership_key_args {
 	u_int16_t	port;
 } __repmgr_membership_key_args;
 
-#define	__REPMGR_MEMBERSHIP_DATA_SIZE	4
+#define	__REPMGR_MEMBERSHIP_DATA_SIZE	8
 typedef struct ___repmgr_membership_data_args {
+	u_int32_t	status;
 	u_int32_t	flags;
 } __repmgr_membership_data_args;
 
+#define	__REPMGR_V4MEMBERSHIP_DATA_SIZE	4
+typedef struct ___repmgr_v4membership_data_args {
+	u_int32_t	flags;
+} __repmgr_v4membership_data_args;
+
 #define	__REPMGR_MEMBER_METADATA_SIZE	8
 typedef struct ___repmgr_member_metadata_args {
 	u_int32_t	format;
@@ -96,18 +102,41 @@ typedef struct ___repmgr_membr_vers_args {
 	u_int32_t	gen;
 } __repmgr_membr_vers_args;
 
-#define	__REPMGR_SITE_INFO_SIZE	10
+#define	__REPMGR_SITE_INFO_SIZE	14
 typedef struct ___repmgr_site_info_args {
 	DBT		host;
 	u_int16_t	port;
+	u_int32_t	status;
 	u_int32_t	flags;
 } __repmgr_site_info_args;
 
-#define	__REPMGR_CONNECT_REJECT_SIZE	8
+#define	__REPMGR_V4SITE_INFO_SIZE	10
+typedef struct ___repmgr_v4site_info_args {
+	DBT		host;
+	u_int16_t	port;
+	u_int32_t	flags;
+} __repmgr_v4site_info_args;
+
+#define	__REPMGR_CONNECT_REJECT_SIZE	12
 typedef struct ___repmgr_connect_reject_args {
 	u_int32_t	version;
 	u_int32_t	gen;
+	u_int32_t	status;
 } __repmgr_connect_reject_args;
 
-#define	__REPMGR_MAXMSG_SIZE	12
+#define	__REPMGR_V4CONNECT_REJECT_SIZE	8
+typedef struct ___repmgr_v4connect_reject_args {
+	u_int32_t	version;
+	u_int32_t	gen;
+} __repmgr_v4connect_reject_args;
+
+#define	__REPMGR_LSNHIST_MATCH_SIZE	24
+typedef struct ___repmgr_lsnhist_match_args {
+	DB_LSN		lsn;
+	u_int32_t	hist_sec;
+	u_int32_t	hist_nsec;
+	DB_LSN		next_gen_lsn;
+} __repmgr_lsnhist_match_args;
+
+#define	__REPMGR_MAXMSG_SIZE	24
 #endif
diff --git a/src/dbinc_auto/repmgr_ext.h b/src/dbinc_auto/repmgr_ext.h
index b1237950..3ff59ffe 100644
--- a/src/dbinc_auto/repmgr_ext.h
+++ b/src/dbinc_auto/repmgr_ext.h
@@ -29,6 +29,8 @@ int __repmgr_membership_key_marshal __P((ENV *, __repmgr_membership_key_args *,
 int __repmgr_membership_key_unmarshal __P((ENV *, __repmgr_membership_key_args *, u_int8_t *, size_t, u_int8_t **));
 void __repmgr_membership_data_marshal __P((ENV *, __repmgr_membership_data_args *, u_int8_t *));
 int __repmgr_membership_data_unmarshal __P((ENV *, __repmgr_membership_data_args *, u_int8_t *, size_t, u_int8_t **));
+void __repmgr_v4membership_data_marshal __P((ENV *, __repmgr_v4membership_data_args *, u_int8_t *));
+int __repmgr_v4membership_data_unmarshal __P((ENV *, __repmgr_v4membership_data_args *, u_int8_t *, size_t, u_int8_t **));
 void __repmgr_member_metadata_marshal __P((ENV *, __repmgr_member_metadata_args *, u_int8_t *));
 int __repmgr_member_metadata_unmarshal __P((ENV *, __repmgr_member_metadata_args *, u_int8_t *, size_t, u_int8_t **));
 int __repmgr_gm_fwd_marshal __P((ENV *, __repmgr_gm_fwd_args *, u_int8_t *, size_t, size_t *));
@@ -37,21 +39,34 @@ void __repmgr_membr_vers_marshal __P((ENV *, __repmgr_membr_vers_args *, u_int8_
 int __repmgr_membr_vers_unmarshal __P((ENV *, __repmgr_membr_vers_args *, u_int8_t *, size_t, u_int8_t **));
 int __repmgr_site_info_marshal __P((ENV *, __repmgr_site_info_args *, u_int8_t *, size_t, size_t *));
 int __repmgr_site_info_unmarshal __P((ENV *, __repmgr_site_info_args *, u_int8_t *, size_t, u_int8_t **));
+int __repmgr_v4site_info_marshal __P((ENV *, __repmgr_v4site_info_args *, u_int8_t *, size_t, size_t *));
+int __repmgr_v4site_info_unmarshal __P((ENV *, __repmgr_v4site_info_args *, u_int8_t *, size_t, u_int8_t **));
 void __repmgr_connect_reject_marshal __P((ENV *, __repmgr_connect_reject_args *, u_int8_t *));
 int __repmgr_connect_reject_unmarshal __P((ENV *, __repmgr_connect_reject_args *, u_int8_t *, size_t, u_int8_t **));
+void __repmgr_v4connect_reject_marshal __P((ENV *, __repmgr_v4connect_reject_args *, u_int8_t *));
+int __repmgr_v4connect_reject_unmarshal __P((ENV *, __repmgr_v4connect_reject_args *, u_int8_t *, size_t, u_int8_t **));
+void __repmgr_lsnhist_match_marshal __P((ENV *, __repmgr_lsnhist_match_args *, u_int8_t *));
+int __repmgr_lsnhist_match_unmarshal __P((ENV *, __repmgr_lsnhist_match_args *, u_int8_t *, size_t, u_int8_t **));
 int __repmgr_member_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 int __repmgr_init_print __P((ENV *, DB_DISTAB *));
 int __repmgr_init_election __P((ENV *, u_int32_t));
 int __repmgr_claim_victory __P((ENV *));
 int __repmgr_turn_on_elections __P((ENV *));
-int __repmgr_start __P((DB_ENV *, int, u_int32_t));
+int __repmgr_start_pp __P((DB_ENV *, int, u_int32_t));
+int __repmgr_start_int __P((ENV *, int, u_int32_t));
 int __repmgr_valid_config __P((ENV *, u_int32_t));
+int __repmgr_prefmas_auto_config __P((DB_ENV *, u_int32_t *));
 int __repmgr_autostart __P((ENV *));
 int __repmgr_start_selector __P((ENV *));
 int __repmgr_close __P((ENV *));
 int __repmgr_stop __P((ENV *));
 int __repmgr_set_ack_policy __P((DB_ENV *, int));
 int __repmgr_get_ack_policy __P((DB_ENV *, int *));
+int __repmgr_set_incoming_queue_max __P((DB_ENV *, u_int32_t, u_int32_t));
+int __repmgr_get_incoming_queue_max __P((DB_ENV *, u_int32_t *, u_int32_t *));
+void __repmgr_set_incoming_queue_redzone __P((void *, u_int32_t, u_int32_t));
+int __repmgr_get_incoming_queue_redzone __P((DB_ENV *, u_int32_t *, u_int32_t *));
+int __repmgr_get_incoming_queue_fullevent __P((DB_ENV *, int *));
 int __repmgr_env_create __P((ENV *, DB_REP *));
 void __repmgr_env_destroy __P((ENV *, DB_REP *));
 int __repmgr_stop_threads __P((ENV *));
@@ -72,12 +87,13 @@ int __repmgr_site_by_eid __P((DB_ENV *, int, DB_SITE **));
 int __repmgr_get_site_address __P((DB_SITE *, const char **, u_int *));
 int __repmgr_get_eid __P((DB_SITE *, int *));
 int __repmgr_get_config __P((DB_SITE *, u_int32_t, u_int32_t *));
-int __repmgr_site_config __P((DB_SITE *, u_int32_t, u_int32_t));
+int __repmgr_site_config_pp __P((DB_SITE *, u_int32_t, u_int32_t));
+int __repmgr_site_config_int __P((DB_SITE *, u_int32_t, u_int32_t));
 int __repmgr_site_close __P((DB_SITE *));
 void *__repmgr_msg_thread __P((void *));
 int __repmgr_send_err_resp __P((ENV *, CHANNEL *, int));
 int __repmgr_handle_event __P((ENV *, u_int32_t, void *));
-int __repmgr_update_membership __P((ENV *, DB_THREAD_INFO *, int, u_int32_t));
+int __repmgr_update_membership __P((ENV *, DB_THREAD_INFO *, int, u_int32_t, u_int32_t));
 int __repmgr_set_gm_version __P((ENV *, DB_THREAD_INFO *, DB_TXN *, u_int32_t));
 int __repmgr_setup_gmdb_op __P((ENV *, DB_THREAD_INFO *, DB_TXN **, u_int32_t));
 int __repmgr_cleanup_gmdb_op __P((ENV *, int));
@@ -132,7 +148,6 @@ int __repmgr_select_loop __P((ENV *));
 int __repmgr_queue_destroy __P((ENV *));
 int __repmgr_queue_get __P((ENV *, REPMGR_MESSAGE **, REPMGR_RUNNABLE *));
 int __repmgr_queue_put __P((ENV *, REPMGR_MESSAGE *));
-int __repmgr_queue_size __P((ENV *));
 int __repmgr_member_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
 void *__repmgr_select_thread __P((void *));
 int __repmgr_bow_out __P((ENV *));
@@ -140,6 +155,7 @@ int __repmgr_accept __P((ENV *));
 int __repmgr_compute_timeout __P((ENV *, db_timespec *));
 REPMGR_SITE *__repmgr_connected_master __P((ENV *));
 int __repmgr_check_timeouts __P((ENV *));
+int __repmgr_refresh_selector __P((ENV *));
 int __repmgr_first_try_connections __P((ENV *));
 int __repmgr_send_v1_handshake __P((ENV *, REPMGR_CONNECTION *, void *, size_t));
 int __repmgr_read_from_site __P((ENV *, REPMGR_CONNECTION *));
@@ -151,7 +167,8 @@ int __repmgr_write_some __P((ENV *, REPMGR_CONNECTION *));
 int __repmgr_stat_pp __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t));
 int __repmgr_stat_print_pp __P((DB_ENV *, u_int32_t));
 int __repmgr_stat_print __P((ENV *, u_int32_t));
-int __repmgr_site_list __P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
+int __repmgr_site_list_pp __P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
+int __repmgr_site_list_int __P((ENV *, u_int *, DB_REPMGR_SITE **));
 #ifndef HAVE_REPLICATION_THREADS
 int __repmgr_close __P((ENV *));
 #endif
@@ -162,6 +179,18 @@ int __repmgr_get_ack_policy __P((DB_ENV *, int *));
 int __repmgr_set_ack_policy __P((DB_ENV *, int));
 #endif
 #ifndef HAVE_REPLICATION_THREADS
+int __repmgr_get_incoming_queue_max __P((DB_ENV *, u_int32_t *, u_int32_t *));
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+int __repmgr_set_incoming_queue_max __P((DB_ENV *, u_int32_t, u_int32_t));
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+int __repmgr_get_incoming_queue_redzone __P((DB_ENV *, u_int32_t *, u_int32_t *));
+#endif
+#ifndef HAVE_REPLICATION_THREADS
+int __repmgr_get_incoming_queue_fullevent __P((DB_ENV *, int *));
+#endif
+#ifndef HAVE_REPLICATION_THREADS
 int __repmgr_site __P((DB_ENV *, const char *, u_int, DB_SITE **, u_int32_t));
 #endif
 #ifndef HAVE_REPLICATION_THREADS
@@ -171,10 +200,10 @@ int __repmgr_site_by_eid __P((DB_ENV *, int, DB_SITE **));
 int __repmgr_local_site __P((DB_ENV *, DB_SITE **));
 #endif
 #ifndef HAVE_REPLICATION_THREADS
-int __repmgr_site_list __P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
+int __repmgr_site_list_pp __P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
 #endif
 #ifndef HAVE_REPLICATION_THREADS
-int __repmgr_start __P((DB_ENV *, int, u_int32_t));
+int __repmgr_start_pp __P((DB_ENV *, int, u_int32_t));
 #endif
 #ifndef HAVE_REPLICATION_THREADS
 int __repmgr_stat_pp __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t));
@@ -213,8 +242,8 @@ int __repmgr_thread_failure __P((ENV *, int));
 char *__repmgr_format_eid_loc __P((DB_REP *, REPMGR_CONNECTION *, char *));
 char *__repmgr_format_site_loc __P((REPMGR_SITE *, char *));
 char *__repmgr_format_addr_loc __P((repmgr_netaddr_t *, char *));
-int __repmgr_repstart __P((ENV *, u_int32_t));
-int __repmgr_become_master __P((ENV *));
+int __repmgr_repstart __P((ENV *, u_int32_t, u_int32_t));
+int __repmgr_become_master __P((ENV *, u_int32_t));
 int __repmgr_each_connection __P((ENV *, CONNECTION_ACTION, void *, int));
 int __repmgr_open __P((ENV *, void *));
 int __repmgr_join __P((ENV *, void *));
@@ -225,9 +254,16 @@ int __repmgr_init_new_sites __P((ENV *, int, int));
 int __repmgr_failchk __P((ENV *));
 int __repmgr_master_is_known __P((ENV *));
 int __repmgr_stable_lsn __P((ENV *, DB_LSN *));
+int __repmgr_make_request_conn __P((ENV *, repmgr_netaddr_t *, REPMGR_CONNECTION **));
 int __repmgr_send_sync_msg __P((ENV *, REPMGR_CONNECTION *, u_int32_t, u_int8_t *, u_int32_t));
-int __repmgr_marshal_member_list __P((ENV *, u_int8_t **, size_t *));
-int __repmgr_refresh_membership __P((ENV *, u_int8_t *, size_t));
+int __repmgr_read_own_msg __P((ENV *, REPMGR_CONNECTION *, u_int32_t *, u_int8_t **, size_t *));
+int __repmgr_prefmas_connected __P((ENV *));
+int __repmgr_restart_site_as_client __P((ENV *, int));
+int __repmgr_make_site_readonly_master __P((ENV *, int, u_int32_t *, DB_LSN *));
+int __repmgr_lsnhist_match __P((ENV *, DB_THREAD_INFO *, int, int *));
+int __repmgr_prefmas_get_wait __P((ENV *, u_int32_t *, u_long *));
+int __repmgr_marshal_member_list __P((ENV *, u_int32_t, u_int8_t **, size_t *));
+int __repmgr_refresh_membership __P((ENV *, u_int8_t *, size_t, u_int32_t));
 int __repmgr_reload_gmdb __P((ENV *));
 int __repmgr_gmdb_version_cmp __P((ENV *, u_int32_t, u_int32_t));
 int __repmgr_init_save __P((ENV *, DBT *));
@@ -238,10 +274,11 @@ void __repmgr_print_conn_err __P((ENV *, repmgr_netaddr_t *, int));
 int __repmgr_become_client __P((ENV *));
 REPMGR_SITE *__repmgr_lookup_site __P((ENV *, const char *, u_int));
 int __repmgr_find_site __P((ENV *, const char *, u_int, int *));
-int __repmgr_set_membership __P((ENV *, const char *, u_int, u_int32_t));
+int __repmgr_set_membership __P((ENV *, const char *, u_int, u_int32_t, u_int32_t));
 int __repmgr_bcast_parm_refresh __P((ENV *));
 int __repmgr_chg_prio __P((ENV *, u_int32_t, u_int32_t));
 int __repmgr_bcast_own_msg __P((ENV *, u_int32_t, u_int8_t *, size_t));
+int __repmgr_bcast_member_list __P((ENV *));
 
 #if defined(__cplusplus)
 }
diff --git a/src/dbinc_auto/sequence_ext.h b/src/dbinc_auto/sequence_ext.h
index a2c114cf..8f8b8473 100644
--- a/src/dbinc_auto/sequence_ext.h
+++ b/src/dbinc_auto/sequence_ext.h
@@ -10,6 +10,10 @@ int __seq_stat __P((DB_SEQUENCE *, DB_SEQUENCE_STAT **, u_int32_t));
 int __seq_stat_print __P((DB_SEQUENCE *, u_int32_t));
 const FN * __db_get_seq_flags_fn __P((void));
 const FN * __db_get_seq_flags_fn __P((void));
+int __seq_open __P((DB_SEQUENCE *, DB_TXN *, DBT *, u_int32_t));
+int __seq_initial_value  __P((DB_SEQUENCE *, db_seq_t));
+int __seq_get __P((DB_SEQUENCE *, DB_TXN *, u_int32_t,  db_seq_t *, u_int32_t));
+int __seq_close __P((DB_SEQUENCE *, u_int32_t));
 
 #if defined(__cplusplus)
 }
diff --git a/src/dbinc_auto/tcl_ext.h b/src/dbinc_auto/tcl_ext.h
index 8b076c8b..4ea037c0 100644
--- a/src/dbinc_auto/tcl_ext.h
+++ b/src/dbinc_auto/tcl_ext.h
@@ -19,9 +19,12 @@ int db_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*));
 int tcl_CompactStat __P((Tcl_Interp *, DBTCL_INFO *));
 int tcl_rep_send __P((DB_ENV *, const DBT *, const DBT *, const DB_LSN *, int, u_int32_t));
 int dbc_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*));
+int dbstream_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*));
 int env_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*));
 int tcl_EnvRemove __P((Tcl_Interp *, int, Tcl_Obj * CONST*));
 int tcl_EnvClose __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *));
+int tcl_EnvBackup __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
+int tcl_EnvDbBackup __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_EnvIdReset __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_EnvLsnReset __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_EnvVerbose __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *, Tcl_Obj *));
@@ -30,7 +33,8 @@ int tcl_EnvSetFlags __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *, Tcl_Obj *));
 int tcl_EnvTest __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_EnvGetEncryptFlags __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 void tcl_EnvSetErrfile __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *, char *));
-void tcl_EnvSetMsgfile __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *, char *));
+int tcl_EnvSetMsgfile __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *, char *));
+int tcl_EnvCloseMsgfile __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *));
 int tcl_EnvSetErrpfx __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *, char *));
 int tcl_EnvStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 DBTCL_INFO *_NewInfo __P((Tcl_Interp *, void *, char *, enum INFOTYPE));
@@ -73,9 +77,11 @@ int tcl_LogGet __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_LogPut __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_LogStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_LogStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
+int tcl_LogVerify __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int logc_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*));
 int tcl_LogConfig __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *, Tcl_Obj *));
 int tcl_LogGetConfig __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *));
+int tcl_LogSetMax __P((Tcl_Interp *, DB_ENV *,Tcl_Obj *,u_int32_t *,u_int32_t *));
 void _MpInfoDelete __P((Tcl_Interp *, DBTCL_INFO *));
 int tcl_MpSync __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_MpTrickle __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
@@ -83,6 +89,7 @@ int tcl_Mp __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *));
 int tcl_MpStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_MpStatPrint __P((Tcl_Interp *, int,  Tcl_Obj * CONST*, DB_ENV *));
 int tcl_Mutex __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
+int tcl_MutexFailchkTimeout __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_MutFree __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
 int tcl_MutGet __P((Tcl_Interp *, DB_ENV *, int));
 int tcl_MutLock __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *));
diff --git a/src/dbinc_auto/txn_ext.h b/src/dbinc_auto/txn_ext.h
index 7c21455f..2fbcd147 100644
--- a/src/dbinc_auto/txn_ext.h
+++ b/src/dbinc_auto/txn_ext.h
@@ -60,6 +60,7 @@ int __txn_recover __P((ENV *, DB_PREPLIST *, long, long *, u_int32_t));
 int __txn_get_prepared __P((ENV *, XID *, DB_PREPLIST *, long, long *, u_int32_t));
 int __txn_openfiles __P((ENV *, DB_THREAD_INFO *, DB_LSN *, int));
 int __txn_open __P((ENV *));
+int __txn_region_detach __P((ENV *, DB_TXNMGR *));
 int __txn_findlastckp __P((ENV *, DB_LSN *, DB_LSN *));
 int __txn_env_refresh __P((ENV *));
 u_int32_t __txn_region_mutex_count __P((ENV *));
@@ -67,7 +68,7 @@ u_int32_t __txn_region_mutex_max __P((ENV *));
 size_t __txn_region_size __P((ENV *));
 size_t __txn_region_max __P((ENV *));
 int __txn_id_set __P((ENV *, u_int32_t, u_int32_t));
-int __txn_oldest_reader __P((ENV *, DB_LSN *));
+int __txn_get_readers __P((ENV *, DB_LSN **, int *));
 int __txn_add_buffer __P((ENV *, TXN_DETAIL *));
 int __txn_remove_buffer __P((ENV *, TXN_DETAIL *, db_mutex_t));
 int __txn_stat_pp __P((DB_ENV *, DB_TXN_STAT **, u_int32_t));
diff --git a/src/dbreg/dbreg.c b/src/dbreg/dbreg.c
index 5067edac..99a80959 100644
--- a/src/dbreg/dbreg.c
+++ b/src/dbreg/dbreg.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/log.h"
 #include "dbinc/txn.h"
@@ -171,6 +172,7 @@ __dbreg_setup(dbp, fname, dname, create_txnid)
 		F_SET(fnp, DBREG_EXCL);
 	fnp->txn_ref = 1;
 	fnp->mutex = dbp->mutex;
+	fnp->blob_file_id = dbp->blob_file_id;
 
 	dbp->log_filename = fnp;
 
@@ -722,7 +724,7 @@ __dbreg_failchk(env)
 	MUTEX_LOCK(env, lp->mtx_filelist);
 	for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); fnp != NULL; fnp = nnp) {
 		nnp = SH_TAILQ_NEXT(fnp, q, __fname);
-		if (dbenv->is_alive(dbenv, 
+		if (dbenv->is_alive(dbenv,
 		    fnp->pid, unused, DB_MUTEX_PROCESS_ONLY))
 			continue;
 		MUTEX_LOCK(env, fnp->mutex);
@@ -773,6 +775,7 @@ __dbreg_log_close(env, fnp, txn, op)
 	DB_LOG *dblp;
 	DB_LSN r_unused;
 	int ret;
+	u_int32_t blob_file_lo, blob_file_hi;
 
 	dblp = env->lg_handle;
 	ret = 0;
@@ -788,10 +791,12 @@ __dbreg_log_close(env, fnp, txn, op)
 	memset(&fid_dbt, 0, sizeof(fid_dbt));
 	fid_dbt.data = fnp->ufid;
 	fid_dbt.size = DB_FILE_ID_LEN;
+	SET_LO_HI_VAR(fnp->blob_file_id, blob_file_lo, blob_file_hi);
 	if ((ret = __dbreg_register_log(env, txn, &r_unused,
 	    F_ISSET(fnp, DB_FNAME_DURABLE) ? 0 : DB_LOG_NOT_DURABLE,
 	    op, dbtp, &fid_dbt, fnp->id,
-	    fnp->s_type, fnp->meta_pgno, TXN_INVALID)) != 0) {
+	    fnp->s_type, fnp->meta_pgno, TXN_INVALID, blob_file_lo,
+	    blob_file_hi)) != 0) {
 		/*
 		 * We are trying to close, but the log write failed.
 		 * Unfortunately, close needs to plow forward, because
@@ -958,6 +963,7 @@ __dbreg_log_id(dbp, txn, id, needlock)
 	LOG *lp;
 	u_int32_t op;
 	int i, ret;
+	u_int32_t blob_file_lo, blob_file_hi;
 
 	env = dbp->env;
 	dblp = env->lg_handle;
@@ -996,14 +1002,16 @@ __dbreg_log_id(dbp, txn, id, needlock)
 	fid_dbt.size = DB_FILE_ID_LEN;
 
 	op = !F_ISSET(dbp, DB_AM_OPEN_CALLED) ? DBREG_PREOPEN :
-	    (F_ISSET(dbp, DB_AM_INMEM) ? 
+	    (F_ISSET(dbp, DB_AM_INMEM) ?
 	    (F2_ISSET(dbp, DB2_AM_EXCL) ? DBREG_XREOPEN : DBREG_REOPEN):
 	    (F2_ISSET(dbp, DB2_AM_EXCL) ? DBREG_XOPEN : DBREG_OPEN));
+	SET_LO_HI_VAR(fnp->blob_file_id, blob_file_lo, blob_file_hi);
 	ret = __dbreg_register_log(env, txn, &unused,
 	    F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0,
 	    op | F_ISSET(fnp, DB_FNAME_DBREG_MASK),
 	    r_name.size == 0 ? NULL : &r_name, &fid_dbt, id,
-	    fnp->s_type, fnp->meta_pgno, fnp->create_txnid);
+	    fnp->s_type, fnp->meta_pgno, fnp->create_txnid,
+	    blob_file_lo, blob_file_hi);
 
 	if (needlock)
 		MUTEX_UNLOCK(env, lp->mtx_filelist);
diff --git a/src/dbreg/dbreg.src b/src/dbreg/dbreg.src
index c7740d63..3187bc4f 100644
--- a/src/dbreg/dbreg.src
+++ b/src/dbreg/dbreg.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -26,7 +26,7 @@ INCLUDE
  * ftype: database type
  * id: transaction id of the subtransaction that created the fs object
  */
-BEGIN register		42	2
+BEGIN_COMPAT register		42	2
 DBOP	opcode		u_int32_t	lu
 DBT	name		DBT		s
 DBT	uid		DBT		s
@@ -35,3 +35,26 @@ ARG	ftype		DBTYPE		lx
 ARG	meta_pgno	db_pgno_t	lu
 ARG	id		u_int32_t	lx
 END
+
+/*
+ * Used for registering name/id translations at open or close.
+ * opcode: register or unregister
+ * name: file name
+ * fileid: unique file id
+ * ftype: file type
+ * ftype: database type
+ * id: transaction id of the subtransaction that created the fs object
+ * blob_fid_lo/hi: The blob file directory id
+ */
+BEGIN register		61	2
+DBOP	opcode		u_int32_t	lu
+DBT	name		DBT		s
+DBT	uid		DBT		s
+ARG	fileid		int32_t		ld
+ARG	ftype		DBTYPE		lx
+ARG	meta_pgno	db_pgno_t	lu
+ARG	id		u_int32_t	lx
+ARG	blob_fid_lo	u_int32_t	lu
+ARG	blob_fid_hi	u_int32_t	lu
+END
+
diff --git a/src/dbreg/dbreg_auto.c b/src/dbreg/dbreg_auto.c
index a26e5527..3d9f01c7 100644
--- a/src/dbreg/dbreg_auto.c
+++ b/src/dbreg/dbreg_auto.c
@@ -8,6 +8,16 @@
 #include "dbinc/db_am.h"
 #include "dbinc/txn.h"
 
+DB_LOG_RECSPEC __dbreg_register_42_desc[] = {
+	{LOGREC_DBOP, SSZ(__dbreg_register_42_args, opcode), "opcode", ""},
+	{LOGREC_DBT, SSZ(__dbreg_register_42_args, name), "name", ""},
+	{LOGREC_DBT, SSZ(__dbreg_register_42_args, uid), "uid", ""},
+	{LOGREC_ARG, SSZ(__dbreg_register_42_args, fileid), "fileid", "%ld"},
+	{LOGREC_ARG, SSZ(__dbreg_register_42_args, ftype), "ftype", "%lx"},
+	{LOGREC_ARG, SSZ(__dbreg_register_42_args, meta_pgno), "meta_pgno", "%lu"},
+	{LOGREC_ARG, SSZ(__dbreg_register_42_args, id), "id", "%lx"},
+	{LOGREC_Done, 0, "", ""}
+};
 DB_LOG_RECSPEC __dbreg_register_desc[] = {
 	{LOGREC_DBOP, SSZ(__dbreg_register_args, opcode), "opcode", ""},
 	{LOGREC_DBT, SSZ(__dbreg_register_args, name), "name", ""},
@@ -16,6 +26,8 @@ DB_LOG_RECSPEC __dbreg_register_desc[] = {
 	{LOGREC_ARG, SSZ(__dbreg_register_args, ftype), "ftype", "%lx"},
 	{LOGREC_ARG, SSZ(__dbreg_register_args, meta_pgno), "meta_pgno", "%lu"},
 	{LOGREC_ARG, SSZ(__dbreg_register_args, id), "id", "%lx"},
+	{LOGREC_ARG, SSZ(__dbreg_register_args, blob_fid_lo), "blob_fid_lo", "%lu"},
+	{LOGREC_ARG, SSZ(__dbreg_register_args, blob_fid_hi), "blob_fid_hi", "%lu"},
 	{LOGREC_Done, 0, "", ""}
 };
 /*
diff --git a/src/dbreg/dbreg_autop.c b/src/dbreg/dbreg_autop.c
index ea43addd..931bc2d9 100644
--- a/src/dbreg/dbreg_autop.c
+++ b/src/dbreg/dbreg_autop.c
@@ -10,6 +10,23 @@
 #include "dbinc/txn.h"
 
 /*
+ * PUBLIC: int __dbreg_register_42_print __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__dbreg_register_42_print(env, dbtp, lsnp, notused2, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *info;
+{
+	COMPQUIET(notused2, DB_TXN_PRINT);
+
+	return (__log_print_record(env, dbtp, lsnp, "__dbreg_register_42", __dbreg_register_42_desc, info));
+}
+
+/*
  * PUBLIC: int __dbreg_register_print __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
diff --git a/src/dbreg/dbreg_rec.c b/src/dbreg/dbreg_rec.c
index 1b387bb7..066efa03 100644
--- a/src/dbreg/dbreg_rec.c
+++ b/src/dbreg/dbreg_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1995, 1996
@@ -37,12 +37,16 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_am.h"
 #include "dbinc/txn.h"
 
 static int __dbreg_open_file __P((ENV *,
     DB_TXN *, __dbreg_register_args *, void *));
+static int __dbreg_register_recover_int
+    __P((ENV *, DBT *, db_recops, void *, __dbreg_register_args *));
+
 /*
  * PUBLIC: int __dbreg_register_recover
  * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
@@ -56,21 +60,97 @@ __dbreg_register_recover(env, dbtp, lsnp, op, info)
 	void *info;
 {
 	__dbreg_register_args *argp;
+	int ret;
+
+	argp = NULL;
+
+	if ((ret = __dbreg_register_read(env, dbtp->data, &argp)) != 0)
+		goto out;
+
+	ret = __dbreg_register_recover_int(env, dbtp, op, info, argp);
+
+	if (ret == 0)
+		*lsnp = argp->prev_lsn;
+out:	if (argp != NULL)
+		__os_free(env, argp);
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __dbreg_register_42_recover
+ * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__dbreg_register_42_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__dbreg_register_42_args *argp;
+	__dbreg_register_args arg;
+	int ret;
+
+	argp = NULL;
+	if ((ret = __dbreg_register_42_read(env, dbtp->data, &argp)) != 0)
+		goto err;
+
+	/*
+	 * Databases before 6.0 cannot support blobs, so the blob_fid is 0.
+	 * After 6.0 they can support blobs, so it is possible it has a non-0
+	 * blob_fid, but since logging that value in dbreg_register
+	 * is only used in replication, and replication does not support blobs
+	 * until 6.1, this is safe.
+	 */
+	memcpy(&arg, argp, sizeof(__dbreg_register_42_args));
+	arg.blob_fid_lo = 0;
+	arg.blob_fid_hi = 0;
+
+	ret = __dbreg_register_recover_int(env, dbtp, op, info, &arg);
+
+	if (ret == 0)
+		*lsnp = argp->prev_lsn;
+err:	if (argp != NULL)
+		__os_free(env, argp);
+	return (ret);
+}
+
+/*
+ * Internal register recovery function for both the 42 log version and the
+ * 61 log version.
+ */
+static int
+__dbreg_register_recover_int(env, dbtp, op, info, argp)
+	ENV *env;
+	DBT *dbtp;
+	db_recops op;
+	void *info;
+	__dbreg_register_args *argp;
+{
 	DB_ENTRY *dbe;
 	DB_LOG *dblp;
 	DB *dbp;
 	u_int32_t opcode, status;
 	int do_close, do_open, do_rem, ret, t_ret;
+#ifdef	HAVE_REPLICATION
+	DB_REP *db_rep;
+	DELAYED_BLOB_LIST *dbl;
+	int view_partial;
+
+	dbl = NULL;
+#endif
 
 	dblp = env->lg_handle;
 	dbp = NULL;
+	ret = 0;
 
 #ifdef DEBUG_RECOVER
 	REC_PRINT(__dbreg_register_print);
+#else
+	COMPQUIET(dbtp, NULL);
 #endif
 	do_open = do_close = 0;
-	if ((ret = __dbreg_register_read(env, dbtp->data, &argp)) != 0)
-		goto out;
 
 	opcode = FLD_ISSET(argp->opcode, DBREG_OP_MASK);
 	switch (opcode) {
@@ -123,12 +203,54 @@ __dbreg_register_recover(env, dbtp, lsnp, op, info)
 	}
 
 	if (do_open) {
+#ifdef	HAVE_REPLICATION
+		/*
+		 * Partial replication may apply at this time.  Invoke
+		 * the callback if several conditions are met:
+		 * - We are a view.
+		 * - This is the OPENFILES pass of recovery.
+		 * - The file is not a BDB owned database.
+		 * - The dbreg operation is a create (id != TXN_INVALID).
+		 *
+		 * If the file is to be skipped, then we have to TXN_IGNORE
+		 * the txnlist for that create operation.
+		 */
+		if (IS_VIEW_SITE(env) && op == DB_TXN_OPENFILES &&
+		    (!IS_DB_FILE(argp->name.data) ||
+		    IS_BLOB_META(argp->name.data)) &&
+		    argp->id != TXN_INVALID) {
+			db_rep = env->rep_handle;
+			/*
+			 * Once a view, always a view.  Must have set
+			 * a callback already.
+			 */
+			if (db_rep->partial == NULL) {
+				__db_errx(env, DB_STR("1592",
+				    "Must set a view callback."));
+				ret = EINVAL;
+				goto out;
+			}
+			if ((ret = __rep_call_partial(env,
+			    argp->name.data, &view_partial, 0, &dbl)) != 0)
+				goto out;
+			DB_ASSERT(env, dbl == NULL);
+
+			/*
+			 * If this should not be replicated, then set
+			 * the child txnlist to TXN_IGNORE.
+			 */
+			if (view_partial == 0 &&
+			    (ret = __db_txnlist_update(env, info,
+			    argp->id, TXN_IGNORE, NULL, &status, 1)) != 0)
+				goto out;
+		}
+#endif
 		/*
 		 * We must open the db even if the meta page is not
 		 * yet written as we may be creating subdatabase.
 		 */
-		if (op == DB_TXN_OPENFILES && opcode != DBREG_CHKPNT
-		    && opcode != DBREG_XCHKPNT)
+		if (op == DB_TXN_OPENFILES && opcode != DBREG_CHKPNT &&
+		    opcode != DBREG_XCHKPNT)
 			F_SET(dblp, DBLOG_FORCE_OPEN);
 
 		/*
@@ -205,7 +327,7 @@ __dbreg_register_recover(env, dbtp, lsnp, op, info)
 			if (dbe->dbp == NULL && !dbe->deleted) {
 				/* No valid entry here. Nothing to do. */
 				MUTEX_UNLOCK(env, dblp->mtx_dbreg);
-				goto done;
+				goto out;
 			}
 
 			/* We have either an open entry or a deleted entry. */
@@ -273,11 +395,7 @@ __dbreg_register_recover(env, dbtp, lsnp, op, info)
 			}
 		}
 	}
-done:	if (ret == 0)
-		*lsnp = argp->prev_lsn;
-out:	if (argp != NULL)
-		__os_free(env, argp);
-	return (ret);
+out:	return (ret);
 }
 
 /*
@@ -296,11 +414,13 @@ __dbreg_open_file(env, txn, argp, info)
 	DB *dbp;
 	DB_ENTRY *dbe;
 	DB_LOG *dblp;
+	db_seq_t blob_file_id;
 	u_int32_t id, opcode, status;
 	int ret;
 
 	dblp = env->lg_handle;
 	opcode = FLD_ISSET(argp->opcode, DBREG_OP_MASK);
+	ret = 0;
 
 	/*
 	 * When we're opening, we have to check that the name we are opening
@@ -336,7 +456,7 @@ __dbreg_open_file(env, txn, argp, info)
 		 * bit and try to open it again.
 		 */
 		if ((dbp = dbe->dbp) != NULL) {
-			if (opcode == DBREG_REOPEN || 
+			if (opcode == DBREG_REOPEN ||
 			    opcode == DBREG_XREOPEN ||
 			    !F_ISSET(dbp, DB_AM_OPEN_CALLED) ||
 			    dbp->meta_pgno != argp->meta_pgno ||
@@ -393,7 +513,11 @@ reopen:
 		txn->mgrp = env->tx_handle;
 	}
 
-	return (__dbreg_do_open(env,
-	    txn, dblp, argp->uid.data, argp->name.data, argp->ftype,
-	    argp->fileid, argp->meta_pgno, info, argp->id, opcode));
+	GET_LO_HI(env,
+	    argp->blob_fid_lo, argp->blob_fid_hi, blob_file_id, ret);
+	if (ret != 0)
+		return (ret);
+	return (__dbreg_do_open(env, txn, dblp, argp->uid.data,
+	    argp->name.data, argp->ftype, argp->fileid,
+	    argp->meta_pgno, info, argp->id, opcode, blob_file_id));
 }
diff --git a/src/dbreg/dbreg_stat.c b/src/dbreg/dbreg_stat.c
index 6dfb3869..ad4bbdc2 100644
--- a/src/dbreg/dbreg_stat.c
+++ b/src/dbreg/dbreg_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/dbreg/dbreg_util.c b/src/dbreg/dbreg_util.c
index 80de4d91..0d483f93 100644
--- a/src/dbreg/dbreg_util.c
+++ b/src/dbreg/dbreg_util.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_am.h"
 #include "dbinc/fop.h"
@@ -103,6 +104,7 @@ __dbreg_log_files(env, opcode)
 	LOG *lp;
 	u_int32_t lopcode;
 	int ret;
+	u_int32_t blob_file_hi, blob_file_lo;
 
 	dblp = env->lg_handle;
 	lp = dblp->reginfo.primary;
@@ -137,11 +139,12 @@ __dbreg_log_files(env, opcode)
 		lopcode = opcode;
 		if ( opcode == DBREG_CHKPNT && F_ISSET(fnp, DBREG_EXCL))
 			lopcode = DBREG_XCHKPNT;
+		SET_LO_HI_VAR(fnp->blob_file_id, blob_file_lo, blob_file_hi);
 		if ((ret = __dbreg_register_log(env, NULL, &r_unused,
 		    F_ISSET(fnp, DB_FNAME_DURABLE) ? 0 : DB_LOG_NOT_DURABLE,
 		    lopcode | F_ISSET(fnp, DB_FNAME_DBREG_MASK),
 		    dbtp, &fid_dbt, fnp->id, fnp->s_type, fnp->meta_pgno,
-		    TXN_INVALID)) != 0)
+		    TXN_INVALID, blob_file_lo, blob_file_hi)) != 0)
 			break;
 	}
 
@@ -429,7 +432,7 @@ __dbreg_id_to_db(env, txn, dbpp, ndx, tryopen)
 		if ((ret = __dbreg_do_open(env, txn, dblp,
 		    fname->ufid, name, fname->s_type, ndx, fname->meta_pgno,
 		    NULL, TXN_INVALID, F_ISSET(fname, DB_FNAME_INMEM) ?
-		    DBREG_REOPEN : DBREG_OPEN)) != 0)
+		    DBREG_REOPEN : DBREG_OPEN, fname->blob_file_id)) != 0)
 			return (ret);
 
 		*dbpp = dblp->dbentry[ndx].dbp;
@@ -540,6 +543,53 @@ __dbreg_fid_to_fname(dblp, fid, have_lock, fnamep)
 }
 
 /*
+ * __dbreg_blob_file_to_fname --
+ *	Traverse the shared-memory list of database file names, looking for
+ *	the entry that matches the passed blob file id.  Returns 0 on success;
+ *	-1 on error.
+ *
+ * PUBLIC: int __dbreg_blob_file_to_fname
+ * PUBLIC:	__P((DB_LOG *, db_seq_t, int, FNAME **));
+ */
+int
+__dbreg_blob_file_to_fname(dblp, blob_file_id, have_lock, fnamep)
+	DB_LOG *dblp;
+	db_seq_t blob_file_id;
+	int have_lock;
+	FNAME **fnamep;
+{
+	ENV *env;
+	FNAME *fnp;
+	LOG *lp;
+	int ret;
+
+	env = dblp->env;
+	lp = dblp->reginfo.primary;
+
+	ret = -1;
+
+	/*
+	 * If blob_file is 0 then blobs are not enabled and the value is not
+	 * unique.
+	 */
+	if (blob_file_id == 0)
+		return (ret);
+
+	if (!have_lock)
+		MUTEX_LOCK(env, lp->mtx_filelist);
+	SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname)
+		if (fnp->blob_file_id == blob_file_id) {
+			*fnamep = fnp;
+			ret = 0;
+			break;
+		}
+	if (!have_lock)
+		MUTEX_UNLOCK(env, lp->mtx_filelist);
+
+	return (ret);
+}
+
+/*
  * __dbreg_get_name
  *
  * Interface to get name of registered files.  This is mainly diagnostic
@@ -577,14 +627,14 @@ __dbreg_get_name(env, fid, fnamep, dnamep)
  * is not protected by the thread mutex.
  * PUBLIC: int __dbreg_do_open __P((ENV *,
  * PUBLIC:     DB_TXN *, DB_LOG *, u_int8_t *, char *, DBTYPE,
- * PUBLIC:     int32_t, db_pgno_t, void *, u_int32_t, u_int32_t));
+ * PUBLIC:     int32_t, db_pgno_t, void *, u_int32_t, u_int32_t, db_seq_t));
  */
 int
-__dbreg_do_open(env,
-    txn, lp, uid, name, ftype, ndx, meta_pgno, info, id, opcode)
+__dbreg_do_open(env, txn,
+    dblp, uid, name, ftype, ndx, meta_pgno, info, id, opcode, blob_file_id)
 	ENV *env;
 	DB_TXN *txn;
-	DB_LOG *lp;
+	DB_LOG *dblp;
 	u_int8_t *uid;
 	char *name;
 	DBTYPE ftype;
@@ -592,6 +642,7 @@ __dbreg_do_open(env,
 	db_pgno_t meta_pgno;
 	void *info;
 	u_int32_t id, opcode;
+	db_seq_t blob_file_id;
 {
 	DB *dbp;
 	u_int32_t cstat, ret_stat;
@@ -604,7 +655,7 @@ __dbreg_do_open(env,
 	try_inmem = 0;
 
 retry_inmem:
-	if ((ret = __db_create_internal(&dbp, lp->env, 0)) != 0)
+	if ((ret = __db_create_internal(&dbp, dblp->env, 0)) != 0)
 		return (ret);
 
 	/*
@@ -700,7 +751,7 @@ err:		if (cstat == TXN_UNEXPECTED)
 		 * handling those cases specially, above.
 		 */
 		if (try_inmem == 0 &&
-		    opcode != DBREG_PREOPEN && opcode != DBREG_REOPEN && 
+		    opcode != DBREG_PREOPEN && opcode != DBREG_REOPEN &&
 		    opcode != DBREG_XREOPEN) {
 			if ((ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0)
 				return (ret);
@@ -725,6 +776,7 @@ err:		if (cstat == TXN_UNEXPECTED)
 		 * we are closing a non-existent file and need to mark
 		 * it as deleted.
 		 */
+		dbp->blob_file_id = blob_file_id;
 		if (dbp->log_filename == NULL &&
 		    (ret = __dbreg_setup(dbp, name, NULL, id)) != 0)
 			return (ret);
@@ -736,7 +788,8 @@ not_right:
 		return (ret == 0 ? t_ret : ret);
 
 	/* Add this file as deleted. */
-	if ((t_ret = __dbreg_add_dbentry(env, lp, NULL, ndx)) != 0 && ret == 0)
+	if ((t_ret = __dbreg_add_dbentry(env, dblp, NULL, ndx)) != 0 &&
+	    ret == 0)
 		ret = t_ret;
 	return (ret);
 }
diff --git a/src/env/env_alloc.c b/src/env/env_alloc.c
index 700bfb27..9c8fd046 100644
--- a/src/env/env_alloc.c
+++ b/src/env/env_alloc.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/env/env_backup.c b/src/env/env_backup.c
index 9c79dbb4..2940f44b 100644
--- a/src/env/env_backup.c
+++ b/src/env/env_backup.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2011, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2011, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/env/env_config.c b/src/env/env_config.c
index 57496909..56cebb63 100644
--- a/src/env/env_config.c
+++ b/src/env/env_config.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -84,8 +84,10 @@ static const CFG_DESC config_descs[] = {
     { "rep_set_clockskew",	CFG_2UINT,	__rep_set_clockskew	},
     { "rep_set_limit",		CFG_2UINT,	__rep_set_limit		},
     { "rep_set_nsites",		CFG_UINT,	__rep_set_nsites_pp	},
-    { "rep_set_priority",	CFG_UINT,	__rep_set_priority	},
+    { "rep_set_priority",	CFG_UINT,	__rep_set_priority_pp	},
     { "rep_set_request",	CFG_2UINT,	__rep_set_request	},
+    { "set_blob_dir",		CFG_STRING,	__env_set_blob_dir	},
+    { "set_blob_threshold",	CFG_2UINT,	__env_set_blob_threshold },
     { "set_cache_max",		CFG_2UINT,	__memp_set_cache_max	},
     { "set_create_dir",		CFG_STRING,	__env_set_create_dir	},
     { "set_data_dir",		CFG_STRING,	__env_set_data_dir	},
@@ -133,11 +135,16 @@ static const FN config_rep_config[] = {
 	{ DB_REP_CONF_AUTOROLLBACK,	"db_rep_conf_autorollback" },
 	{ DB_REP_CONF_BULK,		"db_rep_conf_bulk" },
 	{ DB_REP_CONF_DELAYCLIENT,	"db_rep_conf_delayclient" },
+	{ DB_REP_CONF_ELECT_LOGLENGTH,	"db_rep_conf_elect_loglength" },
 	{ DB_REP_CONF_INMEM,		"db_rep_conf_inmem" },
 	{ DB_REP_CONF_LEASE,		"db_rep_conf_lease" },
 	{ DB_REP_CONF_NOWAIT,		"db_rep_conf_nowait" },
 	{ DB_REPMGR_CONF_2SITE_STRICT,	"db_repmgr_conf_2site_strict" },
 	{ DB_REPMGR_CONF_ELECTIONS,	"db_repmgr_conf_elections" },
+	{ DB_REPMGR_CONF_PREFMAS_CLIENT,
+		"db_repmgr_conf_prefmas_client" },
+	{ DB_REPMGR_CONF_PREFMAS_MASTER,
+		"db_repmgr_conf_prefmas_master" },
 	{ 0, NULL }
 };
 
@@ -198,7 +205,9 @@ static const FN config_set_flags_forlog[] = {
 	{ DB_LOG_DIRECT,	"db_direct_log" },
 	{ DB_LOG_DSYNC,		"db_dsync_log" },
 	{ DB_LOG_AUTO_REMOVE,	"db_log_autoremove" },
+	{ DB_LOG_BLOB,		"db_log_blob" },
 	{ DB_LOG_IN_MEMORY,	"db_log_inmemory" },
+	{ DB_LOG_NOSYNC,	"db_log_nosync" },
 	{ 0, NULL }
 };
 
@@ -206,7 +215,9 @@ static const FN config_log_set_config[] = {
 	{ DB_LOG_DIRECT,	"db_log_direct" },
 	{ DB_LOG_DSYNC,		"db_log_dsync" },
 	{ DB_LOG_AUTO_REMOVE,	"db_log_auto_remove" },
+	{ DB_LOG_BLOB,		"db_log_blob" },
 	{ DB_LOG_IN_MEMORY,	"db_log_in_memory" },
+	{ DB_LOG_NOSYNC,	"db_log_nosync" },
 	{ DB_LOG_ZERO,		"db_log_zero" },
 	{ 0, NULL }
 };
@@ -237,6 +248,7 @@ static const FN config_set_verbose[] = {
 	{ DB_VERB_DEADLOCK,	"db_verb_deadlock" },
 	{ DB_VERB_FILEOPS,	"db_verb_fileops" },
 	{ DB_VERB_FILEOPS_ALL,	"db_verb_fileops_all" },
+	{ DB_VERB_MVCC,		"db_verb_mvcc" },
 	{ DB_VERB_RECOVERY,	"db_verb_recovery" },
 	{ DB_VERB_REGISTER,	"db_verb_register" },
 	{ DB_VERB_REPLICATION,	"db_verb_replication" },
@@ -462,7 +474,7 @@ format:		__db_errx(env, DB_STR_A("1584",
 		if ((lv1 = __db_name_to_val(config_rep_timeout, argv[1])) == -1)
 			goto format;
 		CFG_GET_UINT32(argv[2], &uv2);
-		return (__rep_set_timeout(dbenv, lv1, (db_timeout_t)uv2));
+		return (__rep_set_timeout_pp(dbenv, lv1, (db_timeout_t)uv2));
 	}
 
 	/* repmgr_set_ack_policy db_repmgr_acks_XXX */
@@ -475,6 +487,15 @@ format:		__db_errx(env, DB_STR_A("1584",
 		return (__repmgr_set_ack_policy(dbenv, lv1));
 	}
 
+	if (strcasecmp(argv[0], "repmgr_set_incoming_queue_max") == 0) {
+		if (nf != 3)
+			goto format;
+		CFG_GET_UINT32(argv[1], &uv1);
+		CFG_GET_UINT32(argv[2], &uv2);
+		return (__repmgr_set_incoming_queue_max(
+		    dbenv, (u_int32_t)uv1, (u_int32_t)uv2));
+	}
+
 	/*
 	 * Configure name/value pairs of config information for a site (local or
 	 * remote).
@@ -503,7 +524,7 @@ format:		__db_errx(env, DB_STR_A("1584",
 				uv2 = 0;
 			else
 				CFG_GET_UINT32(argv[i + 1], &uv2);
-			if ((ret = __repmgr_site_config(site,
+			if ((ret = __repmgr_site_config_int(site,
 			    (u_int32_t)lv1, (u_int32_t)uv2)) != 0)
 				break;
 		}
@@ -630,6 +651,15 @@ format:		__db_errx(env, DB_STR_A("1584",
 		    dbenv, DB_REGION_INIT, lv1 == 0 ? 0 : 1));
 	}
 
+	/* set_mutex_failchk_timeout <unsigned timeout> */
+	if (strcasecmp(argv[0], "set_mutex_failchk_timeout") == 0) {
+		if (nf != 2)
+			goto format;
+		CFG_GET_UINT32(argv[1], &uv1);
+		return (__env_set_timeout(
+		    dbenv, (u_int32_t)uv1, DB_SET_MUTEX_FAILCHK_TIMEOUT));
+	}
+
 	/* set_reg_timeout <unsigned timeout> */
 	if (strcasecmp(argv[0], "set_reg_timeout") == 0) {
 		if (nf != 2)
diff --git a/src/env/env_failchk.c b/src/env/env_failchk.c
index 05752f07..ad9bed0b 100644
--- a/src/env/env_failchk.c
+++ b/src/env/env_failchk.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -22,9 +22,26 @@ static int __env_in_api __P((ENV *));
 static void __env_clear_state __P((ENV *));
 
 /*
+ * When failchk broadcast is enabled continue after the first error, to try to
+ * find all of them; without broadcasting stop at the first failure.
+ */
+#ifdef HAVE_FAILCHK_BROADCAST
+#define	FAILCHK_PROCESS_ERROR(t_ret, ret)	\
+	if ((t_ret) != 0 && (ret) == 0)	(ret) = (t_ret)
+#else
+#define	FAILCHK_PROCESS_ERROR(t_ret, ret)	\
+	if (((ret) = (t_ret)) != 0) goto err
+#endif
+
+/*
  * __env_failchk_pp --
  *	ENV->failchk pre/post processing.
  *
+ *	Single process failchk continues after recoverable failures but stops as
+ *	soon as recovery is required. Broadcast failchks continue even after
+ *	DB_RUNRECOVERY failures are detected, to maximize the possibility to
+ *	wake up processes blocked on dead resources, e.g. mutexes.
+ *
  * PUBLIC: int __env_failchk_pp __P((DB_ENV *, u_int32_t));
  */
 int
@@ -46,7 +63,7 @@ __env_failchk_pp(dbenv, flags)
 	 */
 	if (!ALIVE_ON(env)) {
 		__db_errx(env, DB_STR("1503",
-	"DB_ENV->failchk requires DB_ENV->is_alive be configured"));
+		    "DB_ENV->failchk requires DB_ENV->is_alive be configured"));
 		return (EINVAL);
 	}
 
@@ -59,10 +76,14 @@ __env_failchk_pp(dbenv, flags)
 	ENV_LEAVE(env, ip);
 	return (ret);
 }
+
 /*
  * __env_failchk_int --
  *	Process the subsystem failchk routines
  *
+ *	The FAILCHK_PROCESS_ERROR macro (defined at the top of this file)
+ *	differs between the broadcast and single process versions of failchk.
+ *
  * PUBLIC: int __env_failchk_int __P((DB_ENV *));
  */
 int
@@ -70,42 +91,52 @@ __env_failchk_int(dbenv)
 	DB_ENV *dbenv;
 {
 	ENV *env;
-	int ret;
+	int ret, t_ret;
 
 	env = dbenv->env;
+	ret = 0;
 	F_SET(dbenv, DB_ENV_FAILCHK);
 
 	/*
-	 * We check for dead threads in the API first as this would be likely
-	 * to hang other things we try later, like locks and transactions.
+	 * We check for dead threads in the API first as this would likely
+	 * hang other things we try later, like locks and transactions.
 	 */
-	if ((ret = __env_in_api(env)) != 0)
+	if ((ret = __env_in_api(env)) != 0) {
+		__db_err(env, ret, "__env_in_api");
 		goto err;
+	}
 
-	if (LOCKING_ON(env) && (ret = __lock_failchk(env)) != 0)
-		goto err;
+	if (LOCKING_ON(env) && (t_ret = __lock_failchk(env)) != 0)
+		FAILCHK_PROCESS_ERROR(t_ret, ret);
 
-	if (TXN_ON(env) &&
-	    ((ret = __txn_failchk(env)) != 0 ||
-	    (ret = __dbreg_failchk(env)) != 0))
-		goto err;
+	if (TXN_ON(env) && ret == 0 && ((t_ret = __txn_failchk(env)) != 0 ||
+	    (t_ret = __dbreg_failchk(env)) != 0))
+		FAILCHK_PROCESS_ERROR(t_ret, ret);
 
-	if ((ret = __memp_failchk(env)) != 0)
-		goto err;
+	if ((t_ret = __memp_failchk(env)) != 0)
+		FAILCHK_PROCESS_ERROR(t_ret, ret);
 
 #ifdef HAVE_REPLICATION_THREADS
-	if (REP_ON(env) && (ret = __repmgr_failchk(env)) != 0)
-		goto err;
+	if (REP_ON(env) && (t_ret = __repmgr_failchk(env)) != 0)
+		FAILCHK_PROCESS_ERROR(t_ret, ret);
 #endif
 
-	/* Mark any dead blocked threads as dead. */
-	__env_clear_state(env);
+err:
 
 #ifdef HAVE_MUTEX_SUPPORT
-	ret = __mut_failchk(env);
+	if ((t_ret = __mutex_failchk(env)) != 0 && ret == 0)
+		ret = t_ret;
 #endif
 
-err:	F_CLR(dbenv, DB_ENV_FAILCHK);
+	/* Any dead blocked thread slots are no longer needed; allow reuse. */
+	if (ret == 0)
+		__env_clear_state(env);
+	if (ret == DB_RUNRECOVERY) {
+		/* Announce a panic; avoid __env_panic()'s diag core dump. */
+		__env_panic_set(env, 1);
+		__env_panic_event(env, ret);
+	}
+	F_CLR(dbenv, DB_ENV_FAILCHK);
 	return (ret);
 }
 
@@ -312,7 +343,8 @@ __env_in_api(env)
 	REGINFO *infop;
 	THREAD_INFO *thread;
 	u_int32_t i;
-	int unpin, ret;
+	pid_t pid;
+	int unpin, ret, t_ret;
 
 	if ((htab = env->thr_hashtab) == NULL)
 		return (EINVAL);
@@ -322,10 +354,13 @@ __env_in_api(env)
 	renv = infop->primary;
 	thread = R_ADDR(infop, renv->thread_off);
 	unpin = 0;
+	ret = 0;
 
 	for (i = 0; i < env->thr_nbucket; i++)
 		SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) {
+			pid = ip->dbth_pid;
 			if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE ||
+			    ip->dbth_state == THREAD_BLOCKED_DEAD ||
 			    (ip->dbth_state == THREAD_OUT &&
 			    thread->thr_count <  thread->thr_max))
 				continue;
@@ -341,26 +376,63 @@ __env_in_api(env)
 				ip->dbth_state = THREAD_SLOT_NOT_IN_USE;
 				continue;
 			}
-			return (__db_failed(env, DB_STR("1507",
+			/*
+			 * The above tests are not atomic, so it is possible that
+			 * the process pointed by ip has changed during the tests.
+			 * In particular, if the process pointed by ip when is_alive
+			 * was executed terminated normally, a new process may reuse
+			 * the same ip structure and change its dbth_state before the
+			 * next two tests were performed. Therefore, we need to test
+			 * here that all four tests above are done on the same process.
+			 * If the process pointed by ip changed, all tests are invalid
+			 * and can be ignored.
+			 * Similarly, it's also possible for two processes racing to
+			 * change the dbth_state of the same ip structure. For example,
+			 * both process A and B reach the above test for the same
+			 * terminated process C where C's dbth_state is THREAD_OUT.
+			 * If A goes into the 'if' block and changes C's dbth_state to
+			 * THREAD_SLOT_NOT_IN_USE before B checks the condition, B
+			 * would incorrectly fail the test and run into this line.
+			 * Therefore, we need to check C's dbth_state again and fail
+			 * the db only if C's dbth_state is indeed THREAD_ACTIVE.
+			 */
+			if (ip->dbth_state != THREAD_ACTIVE || ip->dbth_pid != pid)
+				continue;
+			__os_gettime(env, &ip->dbth_failtime, 0);
+			t_ret = __db_failed(env, DB_STR("1507",
 			    "Thread died in Berkeley DB library"),
-			    ip->dbth_pid, ip->dbth_tid));
+			    ip->dbth_pid, ip->dbth_tid);
+			if (ret == 0)
+				ret = t_ret;
+			/*
+			 * Classic failchk stop after one dead thread in the
+			 * api, but broadcasting looks for all.
+			 */
+#ifndef HAVE_FAILCHK_BROADCAST
+			return (ret);
+#endif
 		}
 
 	if (unpin == 0)
-		return (0);
+		return (ret);
 
 	for (i = 0; i < env->thr_nbucket; i++)
 		SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info)
 			if (ip->dbth_state == THREAD_BLOCKED_DEAD &&
-			    (ret = __memp_unpin_buffers(env, ip)) != 0)
+			    (t_ret = __memp_unpin_buffers(env, ip)) != 0) {
+				if (ret == 0)
+					ret = t_ret;
+#ifndef HAVE_FAILCHK_BROADCAST
 				return (ret);
+#endif
+			}
 
-	return (0);
+	return (ret);
 }
 
 /*
  * __env_clear_state --
- *	Look for threads which died while blockedi and clear them..
+ *	Look for threads which died while blocked and clear them..
  */
 static void
 __env_clear_state(env)
@@ -441,6 +513,9 @@ __env_set_state(env, ipp, state)
 #endif
 	}
 
+	/* A failchk thread must not block on a lock -- that would be faulty. */
+	if (state == THREAD_BLOCKED && ip != NULL)
+		DB_ASSERT(env, ip->dbth_state != THREAD_FAILCHK);
 	/*
 	 * If ipp is not null,  return the thread control block if found.
 	 * Check to ensure the thread of control has been registered.
@@ -457,7 +532,9 @@ __env_set_state(env, ipp, state)
 
 	*ipp = NULL;
 	ret = 0;
-	if (ip == NULL) {
+	if (ip != NULL)
+		ip->dbth_state = state;
+	else {
 		infop = env->reginfo;
 		renv = infop->primary;
 		thread = R_ADDR(infop, renv->thread_off);
@@ -503,11 +580,13 @@ __env_set_state(env, ipp, state)
 init:			ip->dbth_pid = id.pid;
 			ip->dbth_tid = id.tid;
 			ip->dbth_state = state;
+			for (indx = 0; indx != MUTEX_STATE_MAX; indx++)
+				ip->dbth_latches[indx].mutex = MUTEX_INVALID;
 			SH_TAILQ_INIT(&ip->dbth_xatxn);
 		}
 		MUTEX_UNLOCK(env, renv->mtx_regenv);
-	} else
-		ip->dbth_state = state;
+	}
+
 	*ipp = ip;
 
 	DB_ASSERT(env, ret == 0);
@@ -535,7 +614,7 @@ __env_thread_id_string(dbenv, pid, tid, buf)
 #ifdef UINT64_FMT
 	char fmt[20];
 
-	snprintf(fmt, sizeof(fmt), "%s/%s", UINT64_FMT, UINT64_FMT);
+	snprintf(fmt, sizeof(fmt), "%s/%s", INT64_FMT, UINT64_FMT);
 	snprintf(buf,
 	    DB_THREADID_STRLEN, fmt, (u_int64_t)pid, (u_int64_t)(uintptr_t)tid);
 #else
diff --git a/src/env/env_file.c b/src/env/env_file.c
index b102404d..d6e29b21 100644
--- a/src/env/env_file.c
+++ b/src/env/env_file.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2002, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2002, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -12,7 +12,7 @@
 
 /*
  * __db_file_extend --
- *	Initialize a regular file by writing the last page of the file.
+ *	Initialize or extend a regular file by writing to its last page.
  *
  * PUBLIC: int __db_file_extend __P((ENV *, DB_FH *, size_t));
  */
@@ -27,7 +27,19 @@ __db_file_extend(env, fhp, size)
 	u_int32_t relative;
 	int ret;
 	char buf;
+#ifdef HAVE_MMAP_EXTEND
+	unsigned pagesize;
 
+ 	/*
+	 * Round up size to the VM pagesize. If it isn't aligned, then the bytes
+	 * ending the mapping might have no corresponding backing location on
+	 * disk, and could be silently lost when the process exits. [#23290]
+         */
+	if (F_ISSET(fhp, DB_FH_REGION)) {
+		pagesize = (unsigned)getpagesize();
+		size = DB_ALIGN(size, pagesize);
+	}
+#endif
 	buf = '\0';
 	/*
 	 * Extend the file by writing the last page.  If the region is >4Gb,
diff --git a/src/env/env_globals.c b/src/env/env_globals.c
index 955e6738..2d665661 100644
--- a/src/env/env_globals.c
+++ b/src/env/env_globals.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -31,14 +31,21 @@ DB_GLOBALS __db_global_values = {
 
 	"=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=", /* db_line */
 	{ 0 },				/* error_buf */
-	0,				/* uid_init */
-	0,				/* rand_next */
+	0,				/* random_seeded */
+#if defined(HAVE_RANDOM_R)
+	{ 0 },				/* random_r random_data */
+	{ 0 },				/* random_r state */
+#elif !defined(HAVE_RAND) && !defined(HAVE_RANDOM)
+	0,				/* rand/srand value */
+#endif
 	0,				/* fid_serial */
 	0,				/* db_errno */
-	0,				/* num_active_pids */
-	0,				/* size_active_pids */
-	NULL,                           /* active_pids */
 	NULL,                           /* saved_errstr */
+	"%m/%d %H:%M:%S",		/* strftime format for dates */
+#if defined(HAVE_ERROR_HISTORY)
+	0,				/* thread local msgs_key */
+	PTHREAD_ONCE_INIT,		/* pthread_once initializer */
+#endif
 	NULL,				/* j_assert */
 	NULL,				/* j_close */
 	NULL,				/* j_dirfree */
diff --git a/src/env/env_method.c b/src/env/env_method.c
index 63deacea..c246febc 100644
--- a/src/env/env_method.c
+++ b/src/env/env_method.c
@@ -1,9 +1,9 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
- * $Id: env_method.c,v dabaaeb7d839 2010/08/03 17:28:53 mike $
+ * $Id$
  */
 
 #include "db_config.h"
@@ -40,6 +40,7 @@ static int  __env_get_app_dispatch
 		__P((DB_ENV *, int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
 static int  __env_set_app_dispatch
 		__P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
+static int  __env_get_blob_dir __P((DB_ENV *, const char **));
 static int __env_set_event_notify
 		__P((DB_ENV *, void (*)(DB_ENV *, u_int32_t, void *)));
 static int  __env_get_feedback __P((DB_ENV *, void (**)(DB_ENV *, int, int)));
@@ -81,6 +82,11 @@ db_env_create(dbenvpp, flags)
 	if (flags != 0)
 		return (EINVAL);
 
+#ifdef HAVE_ERROR_HISTORY
+	/* Call thread local storage initializer at least once per process. */
+	__db_thread_init();
+#endif
+
 	/* Allocate the DB_ENV and ENV structures -- we always have both. */
 	if ((ret = __os_calloc(NULL, 1, sizeof(DB_ENV), &dbenv)) != 0)
 		return (ret);
@@ -159,7 +165,7 @@ __db_env_init(dbenv)
 	 */
 	/* DB_ENV PUBLIC HANDLE LIST BEGIN */
 	dbenv->add_data_dir = __env_add_data_dir;
-	dbenv->backup = __db_backup;
+	dbenv->backup = __db_backup_pp;
 	dbenv->dbbackup = __db_dbbackup_pp;
 	dbenv->cdsgroup_begin = __cdsgroup_begin_pp;
 	dbenv->close = __env_close_pp;
@@ -175,6 +181,8 @@ __db_env_init(dbenv)
 	dbenv->get_cachesize = __memp_get_cachesize;
 	dbenv->get_backup_callbacks = __env_get_backup_callbacks;
 	dbenv->get_backup_config = __env_get_backup_config;
+	dbenv->get_blob_dir = __env_get_blob_dir;
+	dbenv->get_blob_threshold = __env_get_blob_threshold_pp;
 	dbenv->get_create_dir = __env_get_create_dir;
 	dbenv->get_data_dirs = __env_get_data_dirs;
 	dbenv->get_data_len = __env_get_data_len;
@@ -269,7 +277,7 @@ __db_env_init(dbenv)
 	dbenv->open = __env_open_pp;
 	dbenv->remove = __env_remove;
 	dbenv->rep_elect = __rep_elect_pp;
-	dbenv->rep_flush = __rep_flush;
+	dbenv->rep_flush = __rep_flush_pp;
 	dbenv->rep_get_clockskew = __rep_get_clockskew;
 	dbenv->rep_get_config = __rep_get_config;
 	dbenv->rep_get_limit = __rep_get_limit;
@@ -282,29 +290,34 @@ __db_env_init(dbenv)
 	dbenv->rep_set_config = __rep_set_config;
 	dbenv->rep_set_limit = __rep_set_limit;
 	dbenv->rep_set_nsites = __rep_set_nsites_pp;
-	dbenv->rep_set_priority = __rep_set_priority;
+	dbenv->rep_set_priority = __rep_set_priority_pp;
 	dbenv->rep_set_request = __rep_set_request;
-	dbenv->rep_set_timeout = __rep_set_timeout;
+	dbenv->rep_set_timeout = __rep_set_timeout_pp;
 	dbenv->rep_set_transport = __rep_set_transport_pp;
+	dbenv->rep_set_view = __rep_set_view;
 	dbenv->rep_start = __rep_start_pp;
 	dbenv->rep_stat = __rep_stat_pp;
 	dbenv->rep_stat_print = __rep_stat_print_pp;
 	dbenv->rep_sync = __rep_sync;
 	dbenv->repmgr_channel = __repmgr_channel;
 	dbenv->repmgr_get_ack_policy = __repmgr_get_ack_policy;
+	dbenv->repmgr_get_incoming_queue_max = __repmgr_get_incoming_queue_max;
 	dbenv->repmgr_local_site = __repmgr_local_site;
 	dbenv->repmgr_msg_dispatch = __repmgr_set_msg_dispatch;
 	dbenv->repmgr_set_ack_policy = __repmgr_set_ack_policy;
+	dbenv->repmgr_set_incoming_queue_max = __repmgr_set_incoming_queue_max;
 	dbenv->repmgr_site = __repmgr_site;
 	dbenv->repmgr_site_by_eid = __repmgr_site_by_eid;
-	dbenv->repmgr_site_list = __repmgr_site_list;
-	dbenv->repmgr_start = __repmgr_start;
+	dbenv->repmgr_site_list = __repmgr_site_list_pp;
+	dbenv->repmgr_start = __repmgr_start_pp;
 	dbenv->repmgr_stat = __repmgr_stat_pp;
 	dbenv->repmgr_stat_print = __repmgr_stat_print_pp;
 	dbenv->set_alloc = __env_set_alloc;
 	dbenv->set_app_dispatch = __env_set_app_dispatch;
 	dbenv->set_backup_callbacks = __env_set_backup_callbacks;
 	dbenv->set_backup_config = __env_set_backup_config;
+	dbenv->set_blob_dir = __env_set_blob_dir;
+	dbenv->set_blob_threshold = __env_set_blob_threshold;
 	dbenv->set_cache_max = __memp_set_cache_max;
 	dbenv->set_cachesize = __memp_set_cachesize;
 	dbenv->set_create_dir = __env_set_create_dir;
@@ -370,10 +383,11 @@ __db_env_init(dbenv)
 	dbenv->thread_id = __os_id;
 	dbenv->thread_id_string = __env_thread_id_string;
 
+	dbenv->mutex_failchk_timeout = US_PER_SEC;
+
 	env = dbenv->env;
 	__os_id(NULL, &env->pid_cache, NULL);
 
-	env->db_ref = 0;
 	env->log_verify_wrap = __log_verify_wrap;
 	env->data_len = ENV_DEF_DATA_LEN;
 	TAILQ_INIT(&env->fdlist);
@@ -561,6 +575,97 @@ __env_get_memory_init(dbenv, type, countp)
 }
 
 /*
+ * __env_get_blob_threshold_pp --
+ * Get the blob threshold for the environment.  Any data item larger
+ * than the blob threshold is automatically saved as a blob file.
+ *
+ * PUBLIC: int  __env_get_blob_threshold_pp
+ * PUBLIC:         __P ((DB_ENV *, u_int32_t *));
+ */
+int
+__env_get_blob_threshold_pp(dbenv, bytes)
+	DB_ENV *dbenv;
+	u_int32_t *bytes;
+{
+	ENV *env;
+	DB_THREAD_INFO *ip;
+	int ret;
+
+	env = dbenv->env;
+
+	ENV_ENTER(env, ip);
+	ret = __env_get_blob_threshold_int(env, bytes);
+	ENV_LEAVE(env, ip);
+
+	return (ret);
+}
+
+/*
+ * __env_get_blob_threshold_int --
+ * Get the blob threshold for the environment.  Any data item larger
+ * than the blob threshold is automatically saved as a blob file.
+ *
+ * PUBLIC: int  __env_get_blob_threshold_int
+ * PUBLIC:         __P ((ENV *, u_int32_t *));
+ */
+int
+__env_get_blob_threshold_int(env, bytes)
+	ENV *env;
+	u_int32_t *bytes;
+{
+	REGENV *renv;
+	REGINFO *infop;
+
+	if (F_ISSET(env, ENV_OPEN_CALLED)) {
+		infop = env->reginfo;
+		renv = infop->primary;
+		MUTEX_LOCK(env, renv->mtx_regenv);
+		*bytes = renv->blob_threshold;
+		MUTEX_UNLOCK(env, renv->mtx_regenv);
+	} else
+		*bytes = env->dbenv->blob_threshold;
+
+	return (0);
+}
+
+/*
+ * __env_set_blob_threshold --
+ * Set the default blob threshold for the environment.  Any data item larger
+ * than the blob threshold is automatically saved as a blob file.
+ *
+ * PUBLIC: int  __env_set_blob_threshold __P((DB_ENV *, u_int32_t, u_int32_t));
+ */
+int
+__env_set_blob_threshold(dbenv, bytes, flags)
+	DB_ENV *dbenv;
+	u_int32_t bytes;
+	u_int32_t flags;
+{
+	ENV *env;
+	REGENV *renv;
+	REGINFO *infop;
+	DB_THREAD_INFO *ip;
+
+	env = dbenv->env;
+
+	if (__db_fchk(dbenv->env, "DB_ENV->set_blob_threshold", flags, 0) != 0)
+		return (EINVAL);
+
+	if (F_ISSET(env, ENV_OPEN_CALLED)) {
+		infop = env->reginfo;
+		renv = infop->primary;
+		ENV_ENTER(env, ip);
+		MUTEX_LOCK(env, renv->mtx_regenv);
+		renv->blob_threshold = bytes;
+		MUTEX_UNLOCK(env, renv->mtx_regenv);
+		ENV_LEAVE(env, ip);
+	} else
+		dbenv->blob_threshold = bytes;
+
+	return (0);
+}
+
+/*
  * __env_set_memory_init --
  *	DB_ENV->set_memory_init.
  *
@@ -697,6 +802,43 @@ __env_set_app_dispatch(dbenv, app_dispatch)
 }
 
 /*
+ * __env_set_blob_dir --
+ * API to allow the user to override the default blob file
+ * root directory. Must be set if blobs are enabled and an
+ * unnamed environment is created.
+ *
+ * PUBLIC:  int  __env_set_blob_dir __P((DB_ENV *, const char *));
+ */
+int
+__env_set_blob_dir(dbenv, dir)
+	DB_ENV *dbenv;
+	const char *dir;
+{
+	ENV *env;
+
+	env = dbenv->env;
+
+	ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_blob_dir");
+
+	if (dbenv->db_blob_dir != NULL)
+		__os_free(env, dbenv->db_blob_dir);
+	return (__os_strdup(env, dir, &dbenv->db_blob_dir));
+}
+
+/*
+ * __env_get_blob_dir --
+ * Get the blob file root directory.
+ */
+static int
+__env_get_blob_dir(dbenv, dirp)
+    DB_ENV *dbenv;
+    const char **dirp;
+{
+	*dirp = dbenv->db_blob_dir;
+	return (0);
+}
+
+/*
  * __env_get_encrypt_flags --
  *	{DB_ENV,DB}->get_encrypt_flags.
  *
@@ -1061,6 +1203,10 @@ __env_set_backup(env, on)
 		return (EINVAL);
 	}
 
+	/*
+	 * This code does not need env_rep_enter for the checkpoint because
+	 * it can only happen if there is an active bulk txn existing.
+	 */
 	if (needs_checkpoint && (ret = __txn_checkpoint(env, 0, 0, 0)))
 		return (ret);
 	return (0);
@@ -1244,6 +1390,11 @@ __env_set_data_len(dbenv, data_len)
 	DB_ENV *dbenv;
 	u_int32_t data_len;
 {
+	if (data_len == 0) {
+		__db_errx(dbenv->env, DB_STR("1593",
+"Maximum number of bytes to display for each key/data item can not be 0."));
+		return (EINVAL);
+	}
 
 	dbenv->env->data_len = data_len;
 	return (0);
@@ -1720,6 +1871,7 @@ __env_get_verbose(dbenv, which, onoffp)
 	case DB_VERB_DEADLOCK:
 	case DB_VERB_FILEOPS:
 	case DB_VERB_FILEOPS_ALL:
+	case DB_VERB_MVCC:
 	case DB_VERB_RECOVERY:
 	case DB_VERB_REGISTER:
 	case DB_VERB_REPLICATION:
@@ -1758,6 +1910,7 @@ __env_set_verbose(dbenv, which, on)
 	case DB_VERB_DEADLOCK:
 	case DB_VERB_FILEOPS:
 	case DB_VERB_FILEOPS_ALL:
+	case DB_VERB_MVCC:
 	case DB_VERB_RECOVERY:
 	case DB_VERB_REGISTER:
 	case DB_VERB_REPLICATION:
@@ -1888,9 +2041,15 @@ __env_get_timeout(dbenv, timeoutp, flags)
 	int ret;
 
 	ret = 0;
-	if (flags == DB_SET_REG_TIMEOUT) {
+	if (flags == DB_SET_REG_TIMEOUT)
 		*timeoutp = dbenv->envreg_timeout;
-	} else
+	else if (flags == DB_SET_MUTEX_FAILCHK_TIMEOUT)
+#ifdef HAVE_FAILCHK_BROADCAST
+		*timeoutp = dbenv->mutex_failchk_timeout;
+#else
+		ret = USR_ERR(dbenv->env, DB_OPNOTSUP);
+#endif
+	else
 		ret = __lock_get_env_timeout(dbenv, timeoutp, flags);
 	return (ret);
 }
@@ -1912,6 +2071,12 @@ __env_set_timeout(dbenv, timeout, flags)
 	ret = 0;
 	if (flags == DB_SET_REG_TIMEOUT)
 		dbenv->envreg_timeout = timeout;
+	else if (flags == DB_SET_MUTEX_FAILCHK_TIMEOUT)
+#ifdef HAVE_FAILCHK_BROADCAST
+		dbenv->mutex_failchk_timeout = timeout;
+#else
+		ret = USR_ERR(dbenv->env, DB_OPNOTSUP);
+#endif
 	else
 		ret = __lock_set_env_timeout(dbenv, timeout, flags);
 	return (ret);
diff --git a/src/env/env_name.c b/src/env/env_name.c
index a3a0b371..d0dd5635 100644
--- a/src/env/env_name.c
+++ b/src/env/env_name.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 
 static int __db_fullpath
     __P((ENV *, const char *, const char *, int, int, char **));
@@ -122,7 +123,7 @@ __db_appname(env, appname, file, dirp, namep)
 {
 	DB_ENV *dbenv;
 	char **ddp;
-	const char *dir;
+	const char *blob_dir, *dir;
 	int ret;
 
 	dbenv = env->dbenv;
@@ -141,6 +142,8 @@ __db_appname(env, appname, file, dirp, namep)
 	/*
 	 * DB_APP_NONE:
 	 *      DB_HOME/file
+	 * DB_APP_BLOB:
+	 *      DB_HOME/DB_BLOB_DIR/file
 	 * DB_APP_DATA:
 	 *      DB_HOME/DB_DATA_DIR/file
 	 * DB_APP_LOG:
@@ -151,6 +154,12 @@ __db_appname(env, appname, file, dirp, namep)
 	switch (appname) {
 	case DB_APP_NONE:
 		break;
+	case DB_APP_BLOB:
+		if (dbenv != NULL && dbenv->db_blob_dir != NULL)
+			dir = dbenv->db_blob_dir;
+		else
+			dir = BLOB_DEFAULT_DIR;
+		break;
 	case DB_APP_RECOVER:
 	case DB_APP_DATA:
 		/*
@@ -164,6 +173,13 @@ __db_appname(env, appname, file, dirp, namep)
 		/* Second, look in the environment home directory. */
 		DB_CHECKFILE(file, NULL, 1, 0, namep, dirp);
 
+		/* Third, check the blob directory. */
+		if (dbenv != NULL && dbenv->db_blob_dir != NULL)
+			blob_dir = dbenv->db_blob_dir;
+		else
+			blob_dir = BLOB_DEFAULT_DIR;
+		DB_CHECKFILE(file, blob_dir, 1, 0, namep, dirp);
+
 		/*
 		 * Otherwise, we're going to create.  Use the specified
 		 * directory unless we're in recovery and it doesn't exist.
diff --git a/src/env/env_open.c b/src/env/env_open.c
index 7eddca3a..85189369 100644
--- a/src/env/env_open.c
+++ b/src/env/env_open.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -107,10 +107,16 @@ __env_open_pp(dbenv, db_home, flags, mode)
 		__db_errx(env, DB_STR("1589", "DB_PRIVATE is not "
 			    "supported by 64-bit applications in "
 			    "mixed-size-addressing mode"));
-			return (EINVAL);
-		}
+		return (EINVAL);
+	}
 #endif
 
+	if (LF_ISSET(DB_PRIVATE) && PREFMAS_IS_SET(env)) {
+		__db_errx(env, DB_STR("1594", "DB_PRIVATE is not "
+		    "supported in Replication Manager preferred master mode"));
+		return (EINVAL);
+	}
+
 	return (__env_open(dbenv, db_home, flags, mode));
 }
 
@@ -129,12 +135,20 @@ __env_open(dbenv, db_home, flags, mode)
 {
 	DB_THREAD_INFO *ip;
 	ENV *env;
-	u_int32_t orig_flags;
-	int register_recovery, ret, t_ret;
+	u_int32_t orig_flags, retry_flags;
+	int recovery_failed, register_recovery, ret, t_ret;
+	char *old_passwd;
+	size_t old_passwd_len;
+	u_int32_t old_encrypt_flags;
 
 	ip = NULL;
 	env = dbenv->env;
+	recovery_failed = 1;
 	register_recovery = 0;
+	retry_flags = 0;
+	old_passwd = NULL;
+	old_passwd_len = 0;
+	old_encrypt_flags = 0;
 
 	/* Initial configuration. */
 	if ((ret = __env_config(dbenv, db_home, &flags, mode)) != 0)
@@ -171,13 +185,27 @@ __env_open(dbenv, db_home, flags, mode)
 			dbenv->is_alive = __envreg_isalive;
 		}
 
-		if ((ret =
-		    __envreg_register(env, &register_recovery, flags)) != 0)
+		/* 
+		 * Backup the current key, because it would be consumed by
+		 * __envreg_register below
+		 */
+		if (dbenv->passwd != NULL) {
+			if ((ret = __os_strdup(env, dbenv->passwd, &old_passwd)) != 0)
+				goto err;
+			old_passwd_len = dbenv->passwd_len;
+			(void)__env_get_encrypt_flags(dbenv, &old_encrypt_flags);
+		}
+
+		F_SET(dbenv, DB_ENV_NOPANIC);
+		ret = __envreg_register(env, &register_recovery, flags);
+		dbenv->flags = orig_flags;
+		if (ret != 0)
 			goto err;
 		if (register_recovery) {
 			if (!LF_ISSET(DB_RECOVER)) {
 				__db_errx(env, DB_STR("1567",
 	    "The DB_RECOVER flag was not specified, and recovery is needed"));
+				recovery_failed = 0;
 				ret = DB_RUNRECOVERY;
 				goto err;
 			}
@@ -197,16 +225,27 @@ __env_open(dbenv, db_home, flags, mode)
 	 * want to remove files left over for any reason, from any session.
 	 */
 retry:	if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))
+		if (
 #ifdef HAVE_REPLICATION
-		if ((ret = __rep_reset_init(env)) != 0 ||
-		    (ret = __env_remove_env(env)) != 0 ||
-#else
-		if ((ret = __env_remove_env(env)) != 0 ||
+		    (ret = __rep_reset_init(env)) != 0 ||
 #endif
-		    (ret = __env_refresh(dbenv, orig_flags, 0)) != 0)
+		    (ret = __env_remove_env(env)) != 0 ||
+		    (ret = __env_refresh(dbenv,
+		    orig_flags | retry_flags, 0)) != 0)
 			goto err;
 
-	if ((ret = __env_attach_regions(dbenv, flags, orig_flags, 1)) != 0)
+	/* Restore the database key. */
+	if (LF_ISSET(DB_REGISTER) && old_passwd != NULL) {
+		ret = __env_set_encrypt(dbenv, old_passwd, old_encrypt_flags);
+		memset(old_passwd, 0xff, old_passwd_len - 1);
+		__os_free(env, old_passwd);
+		if (ret != 0)
+			goto err;
+	}
+
+	DB_ASSERT(env, ret == 0);
+	if ((ret = __env_attach_regions(dbenv,
+	    flags, orig_flags | retry_flags, 1)) != 0)
 		goto err;
 
 	/*
@@ -216,8 +255,18 @@ retry:	if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))
 	 */
 	if (LF_ISSET(DB_FAILCHK) && !register_recovery) {
 		ENV_ENTER(env, ip);
-		if ((ret = __env_failchk_int(dbenv)) != 0)
+		/*
+		 * Set the thread state so that any waiting for a potentially
+		 * dead thread will call is_alive() in order to avoid hanging.
+		 */
+		FAILCHK_THREAD(env, ip);
+		ret = __env_failchk_int(dbenv);
+		if (ret != 0) {
+			__db_err(env, ret,
+			    DB_STR("1595",
+			    "failchk crash after clean registry"));
 			goto err;
+		}
 		ENV_LEAVE(env, ip);
 	}
 
@@ -230,12 +279,12 @@ err:	if (ret != 0)
 		 * processes can now proceed.
 		 *
 		 * If recovery failed, unregister now and let another process
-		 * clean up.
+		 * clean up and run recovery.
 		 */
 		if (ret == 0 && (t_ret = __envreg_xunlock(env)) != 0)
 			ret = t_ret;
 		if (ret != 0)
-			(void)__envreg_unregister(env, 1);
+			(void)__envreg_unregister(env, recovery_failed);
 	}
 
 	/*
@@ -247,7 +296,11 @@ err:	if (ret != 0)
 	 */
 	if (ret == DB_RUNRECOVERY && !register_recovery &&
 	    !LF_ISSET(DB_RECOVER) && LF_ISSET(DB_REGISTER)) {
+		if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+			__db_msg(env, DB_STR("1596",
+		"env_open DB_REGISTER w/o RECOVER panic: trying w/recovery"));
 		LF_SET(DB_RECOVER);
+		retry_flags = DB_ENV_NOPANIC;
 		goto retry;
 	}
 
@@ -304,6 +357,9 @@ __env_open_arg(dbenv, flags)
 			    "replication requires transaction support"));
 			return (EINVAL);
 		}
+		if ((ret =
+		    __log_set_config_int(dbenv, DB_LOG_BLOB, 1, 1)) != 0)
+			return (ret);
 	}
 	if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) {
 		if ((ret = __db_fcchk(env,
@@ -349,30 +405,6 @@ __env_open_arg(dbenv, flags)
 	}
 #endif
 
-#ifdef HAVE_MUTEX_FCNTL
-	/*
-	 * !!!
-	 * We need a file descriptor for fcntl(2) locking.  We use the file
-	 * handle from the REGENV file for this purpose.
-	 *
-	 * Since we may be using shared memory regions, e.g., shmget(2), and
-	 * not a mapped-in regular file, the backing file may be only a few
-	 * bytes in length.  So, this depends on the ability to call fcntl to
-	 * lock file offsets much larger than the actual physical file.  I
-	 * think that's safe -- besides, very few systems actually need this
-	 * kind of support, SunOS is the only one still in wide use of which
-	 * I'm aware.
-	 *
-	 * The error case is if an application lacks spinlocks and wants to be
-	 * threaded.  That doesn't work because fcntl will lock the underlying
-	 * process, including all its threads.
-	 */
-	if (F_ISSET(env, ENV_THREAD)) {
-		__db_errx(env, DB_STR("1578",
-    "architecture lacks fast mutexes: applications cannot be threaded"));
-		return (EINVAL);
-	}
-#endif
 	return (ret);
 }
 
@@ -506,7 +538,7 @@ __env_close_pp(dbenv, flags)
 {
 	DB_THREAD_INFO *ip;
 	ENV *env;
-	int rep_check, ret, t_ret;
+	int ret, t_ret;
 	u_int32_t close_flags, flags_orig;
 
 	env = dbenv->env;
@@ -517,65 +549,75 @@ __env_close_pp(dbenv, flags)
 	 * Validate arguments, but as a DB_ENV handle destructor, we can't
 	 * fail.
 	 */
-	if (flags != 0 && flags != DB_FORCESYNC &&
-	    (t_ret = __db_ferr(env, "DB_ENV->close", 0)) != 0 && ret == 0)
-		ret = t_ret;
+#undef	OKFLAGS
+#define	OKFLAGS	(DB_FORCESYNC | DB_FORCESYNCENV)
+
+	ret = __db_fchk(env, "DB_ENV->close", flags, OKFLAGS);
 
 #define	DBENV_FORCESYNC		0x00000001
 #define	DBENV_CLOSE_REPCHECK	0x00000010
-	if (flags == DB_FORCESYNC)
+	if (LF_ISSET(DB_FORCESYNC))
 		close_flags |= DBENV_FORCESYNC;
+	if (LF_ISSET(DB_FORCESYNCENV))
+		F_SET(env, ENV_FORCESYNCENV);
+
+	/* 
+	 * Call __env_close() to clean up resources even though the open
+	 * didn't fully succeed.
+	 * */
+	if (!F_ISSET(env, ENV_OPEN_CALLED))
+		goto do_close;
 
 	/*
 	 * If the environment has panic'd, all we do is try and discard
 	 * the important resources.
 	 */
 	if (PANIC_ISSET(env)) {
+		/*
+		 * Temporarily set no panic so we do not trigger the
+		 * LAST_PANIC_CHECK_BEFORE_IO check in __os_physwrite thus
+		 * allowing the unregister to happen correctly.
+		 */
+		flags_orig = dbenv->flags;
+		F_SET(dbenv, DB_ENV_NOPANIC);
+		ENV_ENTER(env, ip);
 		/* clean up from registry file */
-		if (dbenv->registry != NULL) {
-			/*
-			 * Temporarily set no panic so we do not trigger the
-			 * LAST_PANIC_CHECK_BEFORE_IO check in __os_physwr
-			 * thus allowing the unregister to happen correctly.
-			 */
-			flags_orig = F_ISSET(dbenv, DB_ENV_NOPANIC);
-			F_SET(dbenv, DB_ENV_NOPANIC);
+		if (dbenv->registry != NULL)
 			(void)__envreg_unregister(env, 0);
-			dbenv->registry = NULL;
-			if (!flags_orig)
-				F_CLR(dbenv, DB_ENV_NOPANIC);
-		}
 
 		/* Close all underlying threads and sockets. */
-		if (IS_ENV_REPLICATED(env))
-			(void)__repmgr_close(env);
+		(void)__repmgr_close(env);
 
 		/* Close all underlying file handles. */
 		(void)__file_handle_cleanup(env);
+		ENV_LEAVE(env, ip);
+
+		dbenv->flags = flags_orig;
+		(void)__env_region_cleanup(env);
 
-		PANIC_CHECK(env);
+		return (__env_panic_msg(env));
 	}
 
 	ENV_ENTER(env, ip);
 
-	rep_check = IS_ENV_REPLICATED(env) ? 1 : 0;
-	if (rep_check) {
 #ifdef HAVE_REPLICATION_THREADS
-		/*
-		 * Shut down Replication Manager threads first of all.  This
-		 * must be done before __env_rep_enter to avoid a deadlock that
-		 * could occur if repmgr's background threads try to do a rep
-		 * operation that needs __rep_lockout.
-		 */
-		if ((t_ret = __repmgr_close(env)) != 0 && ret == 0)
-			ret = t_ret;
+	/*
+	 * Shut down Replication Manager threads first of all.  This
+	 * must be done before __env_rep_enter to avoid a deadlock that
+	 * could occur if repmgr's background threads try to do a rep
+	 * operation that needs __rep_lockout.
+	 */
+	if ((t_ret = __repmgr_close(env)) != 0 && ret == 0)
+		ret = t_ret;
 #endif
+	if (IS_ENV_REPLICATED(env)) {
 		if ((t_ret = __env_rep_enter(env, 0)) != 0 && ret == 0)
 			ret = t_ret;
+		if (ret == 0)
+			close_flags |= DBENV_CLOSE_REPCHECK;
 	}
 
-	if (rep_check)
-		close_flags |= DBENV_CLOSE_REPCHECK;
+do_close:
 	if ((t_ret = __env_close(dbenv, close_flags)) != 0 && ret == 0)
 		ret = t_ret;
 
@@ -640,8 +682,11 @@ __env_close(dbenv, flags)
 			t_ret = dbp->alt_close(dbp, close_flags);
 		else
 			t_ret = __db_close(dbp, NULL, close_flags);
-		if (t_ret != 0 && ret == 0)
-			ret = t_ret;
+		if (t_ret != 0) {
+			if (ret == 0)
+				ret = t_ret;
+			break;
+		}
 	}
 
 	/*
@@ -661,10 +706,8 @@ __env_close(dbenv, flags)
 #endif
 
 	/* If we're registered, clean up. */
-	if (dbenv->registry != NULL) {
+	if (dbenv->registry != NULL)
 		(void)__envreg_unregister(env, 0);
-		dbenv->registry = NULL;
-	}
 
 	/* Check we've closed all underlying file handles. */
 	if ((t_ret = __file_handle_cleanup(env)) != 0 && ret == 0)
@@ -680,6 +723,9 @@ __env_close(dbenv, flags)
 	if (dbenv->db_md_dir != NULL)
 		__os_free(env, dbenv->db_md_dir);
 	dbenv->db_md_dir = NULL;
+	if (dbenv->db_blob_dir != NULL)
+		__os_free(env, dbenv->db_blob_dir);
+	dbenv->db_blob_dir = NULL;
 	if (dbenv->db_data_dir != NULL) {
 		for (p = dbenv->db_data_dir; *p != NULL; ++p)
 			__os_free(env, *p);
@@ -761,9 +807,7 @@ __env_refresh(dbenv, orig_flags, rep_check)
 			ret = t_ret;
 	}
 
-	/* Discard the DB_ENV, ENV handle mutexes. */
-	if ((t_ret = __mutex_free(env, &dbenv->mtx_db_env)) != 0 && ret == 0)
-		ret = t_ret;
+	/* Discard the ENV handle mutex. */
 	if ((t_ret = __mutex_free(env, &env->mtx_env)) != 0 && ret == 0)
 		ret = t_ret;
 
@@ -936,17 +980,38 @@ __file_handle_cleanup(env)
 	ENV *env;
 {
 	DB_FH *fhp;
+	DB_MPOOL *dbmp;
+	u_int i;
 
-	if (TAILQ_FIRST(&env->fdlist) == NULL)
+	if (TAILQ_EMPTY(&env->fdlist))
 		return (0);
 
-	__db_errx(env, DB_STR("1581",
-	    "File handles still open at environment close"));
+	__db_errx(env,
+	    DB_STR("1581", "File handles still open at environment close"));
 	while ((fhp = TAILQ_FIRST(&env->fdlist)) != NULL) {
-		__db_errx(env, DB_STR_A("1582", "Open file handle: %s", "%s"),
-		    fhp->name);
-		(void)__os_closehandle(env, fhp);
+		__db_errx(env,
+		    DB_STR_A("1582", "Open file handle: %s", "%s"), fhp->name);
+		if (__os_closehandle(env, fhp) != 0)
+			break;
 	}
+	if (env->lockfhp != NULL)
+		env->lockfhp = NULL;
+	/* Invalidate saved pointers to the regions' files: all are closed. */
+	if (env->reginfo != NULL)
+		env->reginfo->fhp = NULL;
+	if (env->lg_handle != NULL)
+		env->lg_handle->reginfo.fhp = NULL;
+	if (env->lk_handle != NULL)
+		env->lk_handle->reginfo.fhp = NULL;
+#ifdef HAVE_MUTEX_SUPPORT
+	if (env->mutex_handle != NULL)
+		env->mutex_handle->reginfo.fhp = NULL;
+#endif
+	if (env->tx_handle != NULL)
+		env->tx_handle->reginfo.fhp = NULL;
+	if ((dbmp = env->mp_handle) != NULL && dbmp->reginfo != NULL)
+		for (i = 0; i < env->dbenv->mp_ncache; ++i)
+			dbmp->reginfo[i].fhp = NULL;
 	return (EINVAL);
 }
 
@@ -1109,11 +1174,9 @@ __env_attach_regions(dbenv, flags, orig_flags, retry_ok)
 		goto err;
 
 	/*
-	 * Initialize the handle mutexes.
+	 * Initialize the handle mutex.
 	 */
 	if ((ret = __mutex_alloc(env,
-	    MTX_ENV_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbenv->mtx_db_env)) != 0 ||
-	    (ret = __mutex_alloc(env,
 	    MTX_ENV_HANDLE, DB_MUTEX_PROCESS_ONLY, &env->mtx_env)) != 0)
 		goto err;
 
@@ -1125,8 +1188,15 @@ __env_attach_regions(dbenv, flags, orig_flags, retry_ok)
 		goto err;
 
 	rep_check = IS_ENV_REPLICATED(env) ? 1 : 0;
-	if (rep_check && (ret = __env_rep_enter(env, 0)) != 0)
+	if (rep_check && (ret = __env_rep_enter(env, 0)) != 0) {
+		/*
+		 * If we get an error we didn't increment handle_cnt,
+		 * so we don't want to decrement it later.  Turn off
+		 * rep_check here.
+		 */
+		rep_check = 0;
 		goto err;
+	}
 
 	if (LF_ISSET(DB_INIT_MPOOL)) {
 		if ((ret = __memp_open(env, create_ok)) != 0)
diff --git a/src/env/env_recover.c b/src/env/env_recover.c
index 9636554a..fb7ddee7 100644
--- a/src/env/env_recover.c
+++ b/src/env/env_recover.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -18,17 +18,15 @@
 #include "dbinc/qam.h"
 #include "dbinc/txn.h"
 
-#ifndef lint
-static const char copyright[] =
-    "Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.\n";
-#endif
-
 static int	__db_log_corrupt __P((ENV *, DB_LSN *));
 static int	__env_init_rec_42 __P((ENV *));
 static int	__env_init_rec_43 __P((ENV *));
 static int	__env_init_rec_46 __P((ENV *));
 static int	__env_init_rec_47 __P((ENV *));
 static int	__env_init_rec_48 __P((ENV *));
+static int	__env_init_rec_53 __P((ENV *));
+static int	__env_init_rec_60 __P((ENV *));
+static int	__env_init_rec_60p1 __P((ENV *));
 static int	__log_earliest __P((ENV *, DB_LOGC *, int32_t *, DB_LSN *));
 
 static double	__lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int));
@@ -632,6 +630,12 @@ err:	if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0)
 
 	dbenv->tx_timestamp = 0;
 
+	/*
+	 * Failure means that the env has panicked. Disable locking so that the
+	 * env can close without its mutexes calls causing additional panics.
+	 */
+	if (ret != 0)
+		F_SET(env->dbenv, DB_ENV_NOLOCKING);
 	F_CLR(env->lg_handle, DBLOG_RECOVER);
 	F_CLR(region, TXN_IN_RECOVERY);
 
@@ -690,7 +694,8 @@ __lsn_diff(low, high, current, max, is_forward)
  * is trying to sync up with a master whose max LSN is less than this
  * client's max lsn; we want to roll back everything after that.
  *
- * Find the latest checkpoint whose ckp_lsn is less than the max lsn.
+ * Find the latest checkpoint less than or equal to max lsn and
+ * return the ckp_lsn from that checkpoint.
  */
 static int
 __log_backup(env, logc, max_lsn, start_lsn)
@@ -713,10 +718,11 @@ __log_backup(env, logc, max_lsn, start_lsn)
 			return (ret);
 		/*
 		 * Follow checkpoints through the log until
-		 * we find one with a ckp_lsn less than
-		 * or equal max_lsn.
+		 * we find one less than or equal max_lsn.
+		 * Then return the ckp_lsn from that checkpoint as it
+		 * is our earliest outstanding txn needed.
 		 */
-		if (LOG_COMPARE(&ckp_args->ckp_lsn, max_lsn) <= 0) {
+		if (LOG_COMPARE(&lsn, max_lsn) <= 0) {
 			*start_lsn = ckp_args->ckp_lsn;
 			break;
 		}
@@ -727,7 +733,7 @@ __log_backup(env, logc, max_lsn, start_lsn)
 		 * done.  Break with DB_NOTFOUND.
 		 */
 		if (IS_ZERO_LSN(lsn)) {
-			ret = DB_NOTFOUND;
+			ret = USR_ERR(env, DB_NOTFOUND);
 			break;
 		}
 		__os_free(env, ckp_args);
@@ -880,6 +886,9 @@ __db_log_corrupt(env, lsnp)
 /*
  * __env_init_rec --
  *
+ *	Install recover functions in the environment. Whenever this is updated,
+ *	corresponding changes are needed by db_printlog's env_init_print().
+ *
  * PUBLIC: int __env_init_rec __P((ENV *, u_int32_t));
  */
 int
@@ -924,6 +933,29 @@ __env_init_rec(env, version)
 	 * oldest revision that applies must be used.  Therefore we override
 	 * the recovery functions in reverse log version order.
 	 */
+	if (version == DB_LOGVERSION)
+		goto done;
+
+	/* DB_LOGVERSION_61 add the blob file id to the dbreg logs. */
+	if (version > DB_LOGVERSION_60p1)
+		goto done;
+	if ((ret = __env_init_rec_60p1(env)) != 0)
+		goto err;
+
+	/*
+	 * DB_LOGVERSION_60p1 changed the two u_int32_t offset fields in the
+	 * log for fop_write_file into a single int64.
+	 */
+	if (version > DB_LOGVERSION_60)
+		goto done;
+	if ((ret = __env_init_rec_60(env)) != 0)
+		goto err;
+
+	/* DB_LOGVERSION_53 changed the heap addrem log record. */
+	if (version > DB_LOGVERSION_53)
+		goto done;
+	if ((ret = __env_init_rec_53(env)) != 0)
+		goto err;
 	/*
 	 * DB_LOGVERSION_53 is a strict superset of DB_LOGVERSION_50.
 	 * So, only check > DB_LOGVERSION_48p2.  If/When log records are
@@ -931,6 +963,8 @@ __env_init_rec(env, version)
 	 */
 	if (version > DB_LOGVERSION_48p2)
 		goto done;
+	if (version >= DB_LOGVERSION_50)
+		goto done;
 	if ((ret = __env_init_rec_48(env)) != 0)
 		goto err;
 	/*
@@ -1091,3 +1125,77 @@ __env_init_rec_48(env)
 err:
 	return (ret);
 }
+
+static int
+__env_init_rec_53(env)
+	ENV *env;
+{
+	int ret;
+
+#ifdef HAVE_HEAP
+	if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+	    __heap_addrem_50_recover, DB___heap_addrem_50)) != 0)
+		goto err;
+#else
+	COMPQUIET(env, NULL);
+	COMPQUIET(ret, 0);
+	goto err;
+#endif
+err:
+	return (ret);
+}
+
+static int
+__env_init_rec_60(env)
+	ENV *env;
+{
+	int ret;
+
+	if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+	    __fop_create_60_recover, DB___fop_create_60)) != 0)
+		goto err;
+
+	if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+	    __fop_remove_60_recover, DB___fop_remove_60)) != 0)
+		goto err;
+
+	if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+	    __fop_rename_60_recover, DB___fop_rename_60)) != 0)
+		goto err;
+
+	if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+	    __fop_rename_noundo_60_recover, DB___fop_rename_noundo_60)) != 0)
+		goto err;
+
+	if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+	    __fop_file_remove_60_recover, DB___fop_file_remove_60)) != 0)
+		goto err;
+
+	if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+	    __fop_write_60_recover, DB___fop_write_60)) != 0)
+		goto err;
+
+	if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+	    __fop_write_file_60_recover, DB___fop_write_file_60)) != 0)
+		goto err;
+err:
+	return (ret);
+}
+
+static int
+__env_init_rec_60p1(env)
+	ENV *env;
+{
+	int ret;
+
+	if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+	    __dbreg_register_42_recover, DB___dbreg_register_42)) != 0)
+		goto err;
+#ifdef HAVE_HEAP
+	if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+	    __heap_addrem_60_recover, DB___heap_addrem_60)) != 0)
+		goto err;
+#endif
+err:
+	return (ret);
+}
diff --git a/src/env/env_region.c b/src/env/env_region.c
index 113bea21..cf7085b7 100644
--- a/src/env/env_region.c
+++ b/src/env/env_region.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -90,8 +90,11 @@ loop:	renv = NULL;
 	 * it's actually a creation or not, and we'll have to fall-back to a
 	 * join if it's not a create.
 	 */
-	if (F_ISSET(env, ENV_PRIVATE) || DB_GLOBAL(j_region_map) != NULL)
+	if (F_ISSET(env, ENV_PRIVATE) || DB_GLOBAL(j_region_map) != NULL) {
+		DB_DEBUG_MSG(env, "env_attach: creating %s",
+		    F_ISSET(env, ENV_PRIVATE) ? "private" : "user map func");
 		goto creation;
+	}
 
 	/*
 	 * Try to create the file, if we have the authority.  We have to ensure
@@ -179,14 +182,15 @@ loop:	renv = NULL;
 	 * something in the region file other than meta-data and that
 	 * shouldn't happen.
 	 */
-	if (size < sizeof(ref))
+	if (size < sizeof(ref)) {
+		DB_DEBUG_MSG(env, "region size %d is too small", (int)size);
 		goto retry;
-	else {
+	} else {
 
 		if (size == sizeof(ref))
 			F_SET(env, ENV_SYSTEM_MEM);
 		else if (F_ISSET(env, ENV_SYSTEM_MEM)) {
-			ret = EINVAL;
+			ret = USR_ERR(env, EINVAL);
 			__db_err(env, ret, DB_STR_A("1535",
 		    "%s: existing environment not created in system memory",
 			    "%s"), infop->name);
@@ -197,6 +201,7 @@ loop:	renv = NULL;
 			    nrw < (size_t)sizeof(rbuf) ||
 			    (ret = __os_seek(env,
 			    env->lockfhp, 0, 0, rbuf.region_off)) != 0) {
+				ret = USR_ERR(env, ret);
 				__db_err(env, ret, DB_STR_A("1536",
 				     "%s: unable to read region info", "%s"),
 				     infop->name);
@@ -207,7 +212,8 @@ loop:	renv = NULL;
 		if ((ret = __os_read(env, env->lockfhp, &ref,
 		    sizeof(ref), &nrw)) != 0 || nrw < (size_t)sizeof(ref)) {
 			if (ret == 0)
-				ret = EIO;
+				ret = USR_ERR(env, EIO);
+			(void)USR_ERR(env, ret);
 			__db_err(env, ret, DB_STR_A("1537",
 			    "%s: unable to read system-memory information",
 			    "%s"), infop->name);
@@ -218,18 +224,16 @@ loop:	renv = NULL;
 		segid = ref.segid;
 	}
 
-#ifndef HAVE_MUTEX_FCNTL
 	/*
-	 * If we're not doing fcntl locking, we can close the file handle.  We
-	 * no longer need it and the less contact between the buffer cache and
-	 * the VM, the better.
+	 * We no longer need the file handle; the less contact between the
+	 * buffer cache and the VM, the better.
 	 */
 	(void)__os_closehandle(env, env->lockfhp);
 	 env->lockfhp = NULL;
-#endif
 
 	/* Call the region join routine to acquire the region. */
 	memset(&tregion, 0, sizeof(tregion));
+	tregion.type = REGION_TYPE_ENV;
 	tregion.size = (roff_t)size;
 	tregion.max = (roff_t)max;
 	tregion.segid = segid;
@@ -257,15 +261,15 @@ user_map_functions:
 	    "Program version %d.%d doesn't match environment version %d.%d",
 			    "%d %d %d %d"), DB_VERSION_MAJOR, DB_VERSION_MINOR,
 			    renv->majver, renv->minver);
-			ret = DB_VERSION_MISMATCH;
+			ret = USR_ERR(env, DB_VERSION_MISMATCH);
 		} else
-			ret = EINVAL;
+			ret = USR_ERR(env, EINVAL);
 		goto err;
 	}
 	if (renv->signature != signature) {
 		__db_errx(env, DB_STR("1539",
 		    "Build signature doesn't match environment"));
-		ret = DB_VERSION_MISMATCH;
+		ret = USR_ERR(env, DB_VERSION_MISMATCH);
 		goto err;
 	}
 
@@ -287,8 +291,16 @@ user_map_functions:
 		ret = __env_panic_msg(env);
 		goto err;
 	}
-	if (renv->magic != DB_REGION_MAGIC)
+	if (renv->magic != DB_REGION_MAGIC) {
+		DB_DEBUG_MSG(env,
+		    "attach sees bad region magic 0x%lx", (u_long)renv->magic);
 		goto retry;
+	}
+
+	if (dbenv->blob_threshold != 0 &&
+	    renv->blob_threshold != dbenv->blob_threshold)
+		__db_msg(env, DB_STR("1591",
+"Warning: Ignoring blob_threshold size when joining environment"));
 
 	/*
 	 * Get a reference to the underlying REGION information for this
@@ -329,7 +341,7 @@ user_map_functions:
 		if (*init_flagsp != 0) {
 			__db_errx(env, DB_STR("1540",
     "configured environment flags incompatible with existing environment"));
-			ret = EINVAL;
+			ret = USR_ERR(env, EINVAL);
 			goto err;
 		}
 		*init_flagsp = renv->init_flags;
@@ -437,6 +449,8 @@ creation:
 	renv->minver = (u_int32_t)minver;
 	renv->patchver = (u_int32_t)patchver;
 	renv->signature = signature;
+	renv->failure_panic = 0;
+	renv->failure_symptom[0] = '\0';
 
 	(void)time(&renv->timestamp);
 	__os_unique_id(env, &renv->envid);
@@ -447,6 +461,8 @@ creation:
 	 */
 	renv->init_flags = (init_flagsp == NULL) ? 0 : *init_flagsp;
 
+	renv->blob_threshold = dbenv->blob_threshold;
+
 	/*
 	 * Set up the region array.  We use an array rather than a linked list
 	 * as we have to traverse this list after failure in some cases, and
@@ -513,17 +529,14 @@ find_err:	__db_errx(env, DB_STR_A("1544",
 		}
 	}
 
-#ifndef HAVE_MUTEX_FCNTL
 	/*
-	 * If we're not doing fcntl locking, we can close the file handle.  We
-	 * no longer need it and the less contact between the buffer cache and
-	 * the VM, the better.
+	 *  We no longer need the file handle and the less contact between the
+	 * buffer cache and the VM, the better.
 	 */
 	if (env->lockfhp != NULL) {
 		 (void)__os_closehandle(env, env->lockfhp);
 		 env->lockfhp = NULL;
 	}
-#endif
 
 	/* Everything looks good, we're done. */
 	env->reginfo = infop;
@@ -550,7 +563,7 @@ retry:	/* Close any open file handle. */
 		(void)__env_sys_detach(env,
 		    infop, F_ISSET(infop, REGION_CREATE));
 
-		if (rp != NULL && F_ISSET(env, DB_PRIVATE))
+		if (rp != NULL && F_ISSET(env, ENV_PRIVATE))
 			__env_alloc_free(infop, rp);
 	}
 
@@ -674,8 +687,23 @@ __env_panic_set(env, on)
 	ENV *env;
 	int on;
 {
-	if (env != NULL && env->reginfo != NULL)
-		((REGENV *)env->reginfo->primary)->panic = on ? 1 : 0;
+	REGENV *renv;
+
+	if (env != NULL && env->reginfo != NULL) {
+		/*
+		 * Remember it in the process' env as well, so that the
+		 * panic-ness is still known on exit from the final close.
+		 */
+		renv = env->reginfo->primary;
+		if (on) {
+			F_SET(env, ENV_REMEMBER_PANIC);
+			if (F_ISSET(env->dbenv, DB_ENV_FAILCHK))
+				renv->failure_panic = 1;
+		}
+		else
+			F_CLR(env, ENV_REMEMBER_PANIC);
+		renv->panic = on ? 1 : 0;
+	}
 }
 
 /*
@@ -775,6 +803,31 @@ __env_ref_get(dbenv, countp)
 }
 
 /*
+ * __env_region_cleanup --
+ *	Detach from any regions, e.g., when closing after a panic.
+ *
+ * PUBLIC: int __env_region_cleanup __P((ENV *));
+ */
+int
+__env_region_cleanup(env)
+	ENV *env;
+{
+	if (env->reginfo != NULL) {
+#ifdef HAVE_MUTEX_SUPPORT
+		(void)__lock_region_detach(env, env->lk_handle);
+		(void)__mutex_region_detach(env, env->mutex_handle);
+#endif
+		(void)__log_region_detach(env, env->lg_handle);
+		(void)__memp_region_detach(env, env->mp_handle);
+		(void)__txn_region_detach(env, env->tx_handle);
+		(void)__env_detach(env, 0);
+		/* Remember the panic state after detaching. */
+		F_SET(env, ENV_REMEMBER_PANIC);
+	}
+	return (0);
+}
+
+/*
  * __env_detach --
  *	Detach from the environment.
  *
@@ -796,9 +849,7 @@ __env_detach(env, destroy)
 
 	/* Close the locking file handle. */
 	if (env->lockfhp != NULL) {
-		if ((t_ret =
-		    __os_closehandle(env, env->lockfhp)) != 0 && ret == 0)
-			ret = t_ret;
+		ret = __os_closehandle(env, env->lockfhp);
 		env->lockfhp = NULL;
 	}
 
@@ -1249,13 +1300,13 @@ __env_sys_attach(env, infop, rp)
 		__db_errx(env, DB_STR_A("1548",
 		    "region size %lu is too large; maximum is %lu", "%lu %lu"),
 		    (u_long)rp->size, (u_long)DB_REGIONSIZE_MAX);
-		return (EINVAL);
+		return (USR_ERR(env, EINVAL));
 	}
 	if (rp->max > DB_REGIONSIZE_MAX) {
 		__db_errx(env, DB_STR_A("1549",
 		    "region max %lu is too large; maximum is %lu", "%lu %lu"),
 		    (u_long)rp->max, (u_long)DB_REGIONSIZE_MAX);
-		return (EINVAL);
+		return (USR_ERR(env, EINVAL));
 	}
 #endif
 
@@ -1281,7 +1332,7 @@ __env_sys_attach(env, infop, rp)
 "architecture does not support locks inside process-local (malloc) memory"));
 			__db_errx(env, DB_STR("1551",
 	    "application may not specify both DB_PRIVATE and DB_THREAD"));
-			return (EINVAL);
+			return (USR_ERR(env, EINVAL));
 		}
 #endif
 		if ((ret = __os_malloc(
@@ -1310,7 +1361,7 @@ __env_sys_attach(env, infop, rp)
 		    "region memory was not correctly aligned"));
 		(void)__env_sys_detach(env, infop,
 		    F_ISSET(infop, REGION_CREATE));
-		return (EINVAL);
+		return (USR_ERR(env, EINVAL));
 	}
 
 	return (0);
@@ -1402,7 +1453,7 @@ __env_des_get(env, env_infop, infop, rpp)
 	 * the region, fail.  The caller generates any error message.
 	 */
 	if (!F_ISSET(infop, REGION_CREATE_OK))
-		return (ENOENT);
+		return (USR_ERR(env, ENOENT));
 
 	/*
 	 * If we didn't find a region and don't have room to create the region
@@ -1411,7 +1462,7 @@ __env_des_get(env, env_infop, infop, rpp)
 	if (empty_slot == NULL) {
 		__db_errx(env, DB_STR("1553",
 		    "no room remaining for additional REGIONs"));
-		return (ENOENT);
+		return (USR_ERR(env, ENOENT));
 	}
 
 	/*
diff --git a/src/env/env_register.c b/src/env/env_register.c
index 7475444d..731ddd1f 100644
--- a/src/env/env_register.c
+++ b/src/env/env_register.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2004, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2004, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -30,6 +30,7 @@
 static	int __envreg_add __P((ENV *, int *, u_int32_t));
 static	int __envreg_pid_compare __P((const void *, const void *));
 static	int __envreg_create_active_pid __P((ENV *, char *));
+static	int __envreg_add_active_pid __P((ENV*, char *));
 
 /*
  * Support for portable, multi-process database environment locking, based on
@@ -137,7 +138,7 @@ __envreg_register(env, need_recoveryp, flags)
 
 	if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
 		__db_msg(env, DB_STR_A("1524",
-	"%lu: register environment", "%lu"), (u_long)pid);
+		    "%lu: register environment", "%lu"), (u_long)pid);
 
 	/* Build the path name and open the registry file. */
 	if ((ret = __db_appname(env,
@@ -176,7 +177,6 @@ __envreg_register(env, need_recoveryp, flags)
 	/* Register this process. */
 	if ((ret = __envreg_add(env, need_recoveryp, flags)) != 0)
 		goto err;
-
 	/*
 	 * Release our exclusive lock if we don't need to run recovery.  If
 	 * we need to run recovery, ENV->open will call back into register
@@ -186,8 +186,7 @@ __envreg_register(env, need_recoveryp, flags)
 		goto err;
 
 	if (0) {
-err:		*need_recoveryp = 0;
-
+err:
 		/*
 		 * !!!
 		 * Closing the file handle must release all of our locks.
@@ -196,7 +195,6 @@ err:		*need_recoveryp = 0;
 			(void)__os_closehandle(env, dbenv->registry);
 		dbenv->registry = NULL;
 	}
-
 	if (pp != NULL)
 		__os_free(env, pp);
 
@@ -222,11 +220,11 @@ __envreg_add(env, need_recoveryp, flags)
 	size_t nr, nw;
 	u_int lcnt;
 	u_int32_t bytes, mbytes, orig_flags;
-	int need_recovery, ret, t_ret;
+	int need_failchk, ret, t_ret;
 	char *p, buf[PID_LEN + 10], pid_buf[PID_LEN + 10];
 
 	dbenv = env->dbenv;
-	need_recovery = 0;
+	need_failchk = t_ret = 0;
 	COMPQUIET(dead, 0);
 	COMPQUIET(p, NULL);
 	ip = NULL;
@@ -269,7 +267,7 @@ kill_all:	/*
 		 * registering.
 		 */
 		if (nr != PID_LEN) {
-			need_recovery = 1;
+			need_failchk = 1;
 			break;
 		}
 
@@ -299,7 +297,7 @@ kill_all:	/*
 		}
 
 #if DB_ENVREG_KILL_ALL
-		if (need_recovery) {
+		if (need_failchk) {
 			pid = (pid_t)strtoul(buf, NULL, 10);
 			(void)kill(pid, SIGKILL);
 
@@ -318,7 +316,7 @@ kill_all:	/*
 				__db_msg(env, DB_STR_A("1530",
 				    "%02u: %s: FAILED", "%02u %s"), lcnt, p);
 
-			need_recovery = 1;
+			need_failchk = 1;
 			dead = pos;
 #if DB_ENVREG_KILL_ALL
 			goto kill_all;
@@ -331,16 +329,27 @@ kill_all:	/*
 				    "%02u: %s: LOCKED", "%02u %s"), lcnt, p);
 	}
 
+	/* Check for a panic; if so there's no need to call failchk. */
+	if (__env_attach(env, NULL, 0, 0) != 0)
+		goto sig_proc;
+	infop = env->reginfo;
+	renv = infop->primary;
+	*need_recoveryp = renv->panic != 0;
+	(void)__env_detach(env, 0);
+	if (*need_recoveryp)
+		return (0);
+
 	/*
-	 * If we have to perform recovery...
+	 * If we have to perform failchk...
 	 *
 	 * Mark all slots empty.  Registry ignores empty slots we can't lock,
 	 * so it doesn't matter if any of the processes are in the middle of
 	 * exiting Berkeley DB -- they'll discard their lock when they exit.
 	 */
-	if (need_recovery) {
+	if (need_failchk) {
 		if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
-			__db_msg(env, "%lu: recovery required", (u_long)pid);
+			__db_msg(env,
+			    "%lu: failchk recovery required", (u_long)pid);
 
 		if (LF_ISSET(DB_FAILCHK) || LF_ISSET(DB_FAILCHK_ISALIVE)) {
 			if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
@@ -352,13 +361,14 @@ kill_all:	/*
 				    env, pid_buf)) != 0)
 					goto sig_proc;
 
-			/* The environment will already exist, so we do not
+			/*
+			 * The environment will already exist, so we do not
 			 * want DB_CREATE set, nor do we want any recovery at
 			 * this point.  No need to put values back as flags is
 			 * passed in by value.  Save original dbenv flags in
 			 * case we need to recover/remove existing environment.
 			 * Set DB_ENV_FAILCHK before attach to help ensure we
-			 * dont block on a mutex held by the dead process.
+			 * don't block on a mutex held by the dead process.
 			 */
 			LF_CLR(DB_CREATE | DB_RECOVER | DB_RECOVER_FATAL);
 			orig_flags = dbenv->flags;
@@ -367,44 +377,53 @@ kill_all:	/*
 			if ((ret = __env_attach_regions(
 			    dbenv, flags, orig_flags, 0)) != 0)
 				goto sig_proc;
-			if ((t_ret =
-			    __env_set_state(env, &ip, THREAD_FAILCHK)) != 0 &&
-			    ret == 0)
+			if ((t_ret = __env_set_state(env,
+			   &ip, THREAD_FAILCHK)) != 0 && ret == 0)
 				ret = t_ret;
-			if ((t_ret =
-			    __env_failchk_int(dbenv)) != 0 && ret == 0)
+			if (ret == 0 && (t_ret = __env_failchk_int(dbenv)) != 0)
 				ret = t_ret;
+			if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+				__db_msg(env,
+				    "%lu: failchk returned %d, ret is %d",
+				    (u_long)pid, t_ret, ret);
 
 			/* Free active pid array if used. */
 			if (LF_ISSET(DB_FAILCHK_ISALIVE)) {
-				DB_GLOBAL(num_active_pids) = 0;
-				DB_GLOBAL(size_active_pids) = 0;
-				__os_free( env, DB_GLOBAL(active_pids));
+				env->num_active_pids = 0;
+				env->size_active_pids = 0;
+				__os_free(env, env->active_pids);
+				env->active_pids = NULL;
 			}
 
 			/* Detach from environment and deregister thread. */
-			if ((t_ret =
-			    __env_refresh(dbenv, orig_flags, 0)) != 0 &&
-			    ret == 0)
+			if ((t_ret = __env_refresh(dbenv,
+			    orig_flags, 0)) != 0 && ret == 0)
 				ret = t_ret;
+			F_CLR(env, ENV_OPEN_CALLED);
+
 			if (ret == 0) {
 				if ((ret = __os_seek(env, dbenv->registry,
-				    0, 0,(u_int32_t)dead)) != 0 ||
+				    0, 0, (u_int32_t)dead)) != 0 ||
 				    (ret = __os_write(env, dbenv->registry,
 				    PID_EMPTY, PID_LEN, &nw)) != 0)
 					return (ret);
-				need_recovery = 0;
+				need_failchk = 0;
 				goto add;
 			}
 
 		}
 		/* If we can't attach, then we cannot set DB_REGISTER panic. */
-sig_proc:	if (__env_attach(env, NULL, 0, 0) == 0) {
+sig_proc:
+		if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+			__db_msg(env, "%lu: sig_proc attaching errs %s/ret %s",
+			    (u_long)pid, db_strerror(t_ret), db_strerror(ret));
+		if (__env_attach(env, NULL, 0, 0) == 0) {
 			infop = env->reginfo;
 			renv = infop->primary;
-			/* Indicate DB_REGSITER panic.  Also, set environment
-			 * panic as this is the panic trigger mechanism in
-			 * the code that everything looks for.
+			/*
+			 * Indicate DB_REGISTER panic.  Also, set (or re-set)
+			 * environment panic as this is the panic trigger
+			 * mechanism in the code that everything looks for.
 			 */
 			renv->reg_panic = 1;
 			renv->panic = 1;
@@ -484,7 +503,7 @@ add:	if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
 		}
 	}
 
-	if (need_recovery)
+	if (need_failchk)
 		*need_recoveryp = 1;
 
 	return (ret);
@@ -543,8 +562,9 @@ __envreg_unregister(env, recovery_failed)
 	 * also releasing our slot lock, we could race.  That can't happen, I
 	 * don't think.
 	 */
-err:	if ((t_ret =
-	    __os_closehandle(env, dbenv->registry)) != 0 && ret == 0)
+err:
+	if (dbenv->registry != NULL &&
+	    (t_ret = __os_closehandle(env, dbenv->registry)) != 0 && ret == 0)
 		ret = t_ret;
 
 	dbenv->registry = NULL;
@@ -610,6 +630,10 @@ __envreg_isalive(dbenv, pid, tid, flags )
 	db_threadid_t tid;
 	u_int32_t flags;
 {
+	ENV *env;
+
+	env = dbenv->env;
+
 	/* in this case we really do not care about tid, simply for lint */
 	DB_THREADID_INIT(tid);
 
@@ -617,15 +641,14 @@ __envreg_isalive(dbenv, pid, tid, flags )
 	if (!((flags == 0) || (flags == DB_MUTEX_PROCESS_ONLY)))
 		return (EINVAL);
 
-	if (DB_GLOBAL(active_pids) == NULL ||
-	    DB_GLOBAL(num_active_pids) == 0 || dbenv == NULL)
+	if (env->active_pids == NULL || env->num_active_pids == 0)
 		return (0);
 	/*
 	 * bsearch returns a pointer to an entry in active_pids if a match
 	 * is found on pid, else no match found it returns NULL.   This
 	 * routine will return a 1 if a match is found, else a 0.
 	 */
-	if (bsearch(&pid, DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids),
+	if (bsearch(&pid, env->active_pids, env->num_active_pids,
 	    sizeof(pid_t), __envreg_pid_compare))
 		return 1;
 
@@ -635,7 +658,8 @@ __envreg_isalive(dbenv, pid, tid, flags )
 /*
  * __envreg_create_active_pid --
  *	Create array of pids, if need more room in array then double size.
- *	Only add active pids from DB_REGISTER file into array.
+ *	Only add active pids from DB_REGISTER file into array. The given
+ *	active my_pid is also added into array.
  */
 static int
 __envreg_create_active_pid(env, my_pid)
@@ -646,8 +670,7 @@ __envreg_create_active_pid(env, my_pid)
 	char buf[PID_LEN + 10];
 	int    ret;
 	off_t  pos;
-	pid_t  pid, *tmparray;
-	size_t tmpsize, nr;
+	size_t nr;
 	u_int lcnt;
 
 	dbenv = env->dbenv;
@@ -655,6 +678,15 @@ __envreg_create_active_pid(env, my_pid)
 	ret = 0;
 
 	/*
+	 * The process getting here has not been added to the DB_REGISTER
+	 * file yet, so include it as the first item in array
+	 */
+	if (env->num_active_pids == 0) {
+		if ((ret = __envreg_add_active_pid(env, my_pid)) != 0)
+			return (ret);
+	}
+
+	/*
 	 * Walk through DB_REGISTER file, we grab pid entries that are locked
 	 * as those represent processes that are still alive.   Ignore empty
 	 * slots, or those that are unlocked.
@@ -678,53 +710,50 @@ __envreg_create_active_pid(env, my_pid)
 			if ((ret = REGISTRY_UNLOCK(env, pos)) != 0)
 				return (ret);
 		} else {
-			/* first, check to make sure we have room in arrary */
-			if (DB_GLOBAL(num_active_pids) + 1 >
-			    DB_GLOBAL(size_active_pids)) {
-				tmpsize =
-				   DB_GLOBAL(size_active_pids) * sizeof(pid_t);
-
-				/* start with 512, then double if must grow */
-				tmpsize = tmpsize>0 ? tmpsize*2 : 512;
-				if ((ret = __os_malloc
-				    (env, tmpsize, &tmparray )) != 0)
-					return (ret);
-
-				/* if array exists, then copy and free */
-				if (DB_GLOBAL(active_pids)) {
-					memcpy( tmparray,
-					    DB_GLOBAL(active_pids),
-					    DB_GLOBAL(num_active_pids) *
-					    sizeof(pid_t));
-					__os_free( env, DB_GLOBAL(active_pids));
-				}
-
-				DB_GLOBAL(active_pids) = tmparray;
-				DB_GLOBAL(size_active_pids) = tmpsize;
-
-				/*
-				 * The process getting here has not been added
-				 * to the DB_REGISTER file yet, so include it
-				 * as the first item in array
-				 */
-				if (DB_GLOBAL(num_active_pids) == 0) {
-					pid = (pid_t)strtoul(my_pid, NULL, 10);
-					DB_GLOBAL(active_pids)
-					   [DB_GLOBAL(num_active_pids)++] = pid;
-				}
-			}
-
-			/* insert into array */
-			pid = (pid_t)strtoul(buf, NULL, 10);
-			DB_GLOBAL(active_pids)
-			    [DB_GLOBAL(num_active_pids)++] = pid;
-
+			if ((ret = __envreg_add_active_pid(env, buf)) != 0)
+				return (ret);
 		}
 
 	}
 
 	/* lets sort the array to allow for binary search in isalive func */
-	qsort(DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids),
+	qsort(env->active_pids, env->num_active_pids,
 	    sizeof(pid_t), __envreg_pid_compare);
 	return (ret);
 }
+
+/*
+ * __envreg_add_active_pid --
+ *	Add an active pid into array, if need more room in array then double size.
+ */
+static int
+__envreg_add_active_pid(env, pid)
+	ENV *env;
+	char *pid;
+{
+	int ret;
+	size_t tmpsize;
+
+	ret = 0;
+
+	/* first, check to make sure we have room in arrary */
+	if (env->num_active_pids + 1 >
+	    env->size_active_pids) {
+		tmpsize =
+		   env->size_active_pids * sizeof(pid_t);
+
+		/* start with 512, then double if must grow */
+		tmpsize = tmpsize > 0 ? tmpsize * 2 : 512;
+		if ((ret = __os_realloc
+		    (env, tmpsize, &(env->active_pids) )) != 0)
+			return (ret);
+
+		env->size_active_pids = tmpsize / sizeof(pid_t);
+	}
+
+	/* insert into array */
+	env->active_pids
+	    [env->num_active_pids++] = (pid_t)strtoul(pid, NULL, 10);
+
+	return (0);
+}
diff --git a/src/env/env_sig.c b/src/env/env_sig.c
index 6d127f85..57e64228 100644
--- a/src/env/env_sig.c
+++ b/src/env/env_sig.c
@@ -28,9 +28,9 @@
  * shared memory.
  */
 #ifdef HAVE_MIXED_SIZE_ADDRESSING
-#define	__STRUCTURE_COUNT	41
+#define	__STRUCTURE_COUNT	48
 #else
-#define	__STRUCTURE_COUNT	(41 + 104)
+#define	__STRUCTURE_COUNT	(48 + 108)
 #endif
 
 /*
@@ -66,7 +66,11 @@ __env_struct_sig()
 	__ADD(__db_h_stat);
 	__ADD(__db_heap_stat);
 	__ADD(__db_qam_stat);
+#ifdef	HAVE_MUTEX_SUPPORT
+	__ADD(__mutex_state);
+#endif
 	__ADD(__db_thread_info);
+	__ADD(__env_thread_info);
 	__ADD(__db_lockregion);
 	__ADD(__sh_dbt);
 	__ADD(__db_lockobj);
@@ -82,6 +86,9 @@ __env_struct_sig()
 	__ADD(__db_mutexregion);
 #endif
 #ifdef	HAVE_MUTEX_SUPPORT
+	__ADD(__mutex_history);
+#endif
+#ifdef	HAVE_MUTEX_SUPPORT
 	__ADD(__db_mutex_t);
 #endif
 	__ADD(__db_reg_env);
@@ -92,6 +99,10 @@ __env_struct_sig()
 
 #ifndef HAVE_MIXED_SIZE_ADDRESSING
 	__ADD(__db_dbt);
+#ifdef	HAVE_MUTEX_SUPPORT
+	__ADD(__db_event_mutex_died_info);
+#endif
+	__ADD(__db_event_failchk_info);
 	__ADD(__db_lockreq);
 	__ADD(__db_log_cursor);
 	__ADD(__log_rec_spec);
@@ -113,6 +124,7 @@ __env_struct_sig()
 	__ADD(__cq_fq);
 	__ADD(__cq_aq);
 	__ADD(__cq_jq);
+	__ADD(__db_stream);
 	__ADD(__db_heap_rid);
 	__ADD(__dbc);
 	__ADD(__key_range);
@@ -125,7 +137,6 @@ __env_struct_sig()
 	__ADD(__fn);
 	__ADD(__db_msgbuf);
 	__ADD(__pin_list);
-	__ADD(__env_thread_info);
 	__ADD(__flag_map);
 	__ADD(__db_backup_handle);
 	__ADD(__env);
diff --git a/src/env/env_stat.c b/src/env/env_stat.c
index 9bc3fe7e..094d0545 100644
--- a/src/env/env_stat.c
+++ b/src/env/env_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -21,11 +21,9 @@ static int   __env_print_dbenv_all __P((ENV *, u_int32_t));
 static int   __env_print_env_all __P((ENV *, u_int32_t));
 static int   __env_print_fh __P((ENV *));
 static int   __env_print_stats __P((ENV *, u_int32_t));
-static int   __env_print_thread __P((ENV *));
 static int   __env_stat_print __P((ENV *, u_int32_t));
 static char *__env_thread_state_print __P((DB_THREAD_STATE));
-static const char *
-	     __reg_type __P((reg_type_t));
+static const char * __reg_type __P((reg_type_t));
 
 /*
  * __env_stat_print_pp --
@@ -146,7 +144,6 @@ __env_stat_print(env, flags)
 /*
  * __env_print_stats --
  *	Display the default environment statistics.
- *
  */
 static int
 __env_print_stats(env, flags)
@@ -186,6 +183,10 @@ __env_print_stats(env, flags)
 	    (u_long)0, (u_long)0, (u_long)infop->rp->size);
 	__db_dlbytes(env, "Maximum region size",
 	    (u_long)0, (u_long)0, (u_long)infop->rp->max);
+	STAT_LONG("Process failure detected", renv->failure_panic);
+	if (renv->failure_symptom[0] != '\0')
+		__db_msg(env,
+		    "%s:\tFirst failure symptom", renv->failure_symptom);
 
 	return (0);
 }
@@ -267,8 +268,6 @@ __env_print_dbenv_all(env, flags)
 
 	__db_msg(env, "%s", DB_GLOBAL(db_line));
 	STAT_POINTER("ENV", dbenv->env);
-	__mutex_print_debug_single(
-	    env, "DB_ENV handle mutex", dbenv->mtx_db_env, flags);
 	STAT_ISSET("Errcall", dbenv->db_errcall);
 	STAT_ISSET("Errfile", dbenv->db_errfile);
 	STAT_STRING("Errpfx", dbenv->db_errpfx);
@@ -286,6 +285,7 @@ __env_print_dbenv_all(env, flags)
 	STAT_ISSET("ThreadId", dbenv->thread_id);
 	STAT_ISSET("ThreadIdString", dbenv->thread_id_string);
 
+	STAT_STRING("Blob dir", dbenv->db_blob_dir);
 	STAT_STRING("Log dir", dbenv->db_log_dir);
 	STAT_STRING("Metadata dir", dbenv->db_md_dir);
 	STAT_STRING("Tmp dir", dbenv->db_tmp_dir);
@@ -304,6 +304,8 @@ __env_print_dbenv_all(env, flags)
 
 	STAT_ISSET("Password", dbenv->passwd);
 
+	STAT_ULONG("Blob threshold", dbenv->blob_threshold);
+
 	STAT_ISSET("App private", dbenv->app_private);
 	STAT_ISSET("Api1 internal", dbenv->api1_internal);
 	STAT_ISSET("Api2 internal", dbenv->api2_internal);
@@ -314,6 +316,7 @@ __env_print_dbenv_all(env, flags)
 	STAT_ULONG("Mutex cnt", dbenv->mutex_cnt);
 	STAT_ULONG("Mutex inc", dbenv->mutex_inc);
 	STAT_ULONG("Mutex tas spins", dbenv->mutex_tas_spins);
+	STAT_LONG("Mutex failchk timeout", dbenv->mutex_failchk_timeout);
 
 	STAT_ISSET("Lock conflicts", dbenv->lk_conflicts);
 	STAT_LONG("Lock modes", dbenv->lk_modes);
@@ -356,6 +359,7 @@ __env_print_dbenv_all(env, flags)
 
 	__db_prflags(env,
 	    NULL, dbenv->flags, db_env_fn, NULL, "\tPublic environment flags");
+	COMPQUIET(flags, 0);
 
 	return (0);
 }
@@ -507,6 +511,8 @@ __env_thread_state_print(state)
 		return ("blocked and dead");
 	case THREAD_OUT:
 		return ("out");
+	case THREAD_VERIFY:
+		return ("verify");
 	default:
 		return ("unknown");
 	}
@@ -516,14 +522,17 @@ __env_thread_state_print(state)
 /*
  * __env_print_thread --
  *	Display the thread block state.
+ *
+ * PUBLIC: int __env_print_thread __P((ENV *));
  */
-static int
+int
 __env_print_thread(env)
 	ENV *env;
 {
 	BH *bhp;
 	DB_ENV *dbenv;
 	DB_HASHTAB *htab;
+	DB_LOCKER *locker;
 	DB_MPOOL *dbmp;
 	DB_THREAD_INFO *ip;
 	PIN_LIST *list, *lp;
@@ -532,6 +541,7 @@ __env_print_thread(env)
 	THREAD_INFO *thread;
 	u_int32_t i;
 	char buf[DB_THREADID_STRLEN];
+	char time_buf[CTIME_BUFLEN];
 
 	dbenv = env->dbenv;
 
@@ -561,6 +571,10 @@ __env_print_thread(env)
 			    dbenv->thread_id_string(
 			    dbenv, ip->dbth_pid, ip->dbth_tid, buf),
 			    __env_thread_state_print(ip->dbth_state));
+			if (timespecisset(&ip->dbth_failtime))
+				__db_msg(env, "Crashed at %s",
+				    __db_ctimespec(&ip->dbth_failtime,
+				    time_buf));
 			list = R_ADDR(env->reginfo, ip->dbth_pinlist);
 			for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) {
 				if (lp->b_ref == INVALID_ROFF)
@@ -570,6 +584,18 @@ __env_print_thread(env)
 				__db_msg(env,
 				     "\t\tpins: %lu", (u_long)bhp->pgno);
 			}
+			if (ip->dbth_local_locker != INVALID_ROFF) {
+				locker = (DB_LOCKER *)
+				    R_ADDR(&env->lk_handle->reginfo,
+				    ip->dbth_local_locker);
+				__db_msg(env, "\t\tcached locker %lx mtx %lu",
+					(u_long)locker->id,
+					(u_long)locker->mtx_locker);
+
+			}
+#ifdef HAVE_MUTEX_SUPPORT
+			(void)__mutex_record_print(env, ip);
+#endif
 		}
 	return (0);
 }
@@ -846,6 +872,7 @@ __reg_type(t)
 		return ("Transaction");
 	case INVALID_REGION_TYPE:
 		return ("Invalid");
+	/*lint -e{787} */
 	}
 	return ("Unknown");
 }
diff --git a/src/fileops/fileops.src b/src/fileops/fileops.src
index cdb6af27..3cb874b7 100644
--- a/src/fileops/fileops.src
+++ b/src/fileops/fileops.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -30,7 +30,14 @@ ARG	appname		u_int32_t	lu
 ARG	mode		u_int32_t	o
 END
 
-BEGIN create		48	143
+BEGIN_COMPAT create		60	143
+DBT	name		DBT		s
+DBT	dirname		DBT		s
+ARG	appname		u_int32_t	lu
+ARG	mode		u_int32_t	o
+END
+
+BEGIN create		60p1	143
 DBT	name		DBT		s
 DBT	dirname		DBT		s
 ARG	appname		u_int32_t	lu
@@ -43,7 +50,13 @@ END
  * name: name in the file system
  * appname: indicates if the name needs to go through __db_appname
  */
-BEGIN remove		42	144
+BEGIN_COMPAT remove		60	144
+DBT	name		DBT		s
+DBT	fid		DBT		s
+ARG	appname		u_int32_t	lu
+END
+
+BEGIN remove		60p1	144
 DBT	name		DBT		s
 DBT	fid		DBT		s
 ARG	appname		u_int32_t	lu
@@ -71,7 +84,18 @@ DBT	page		DBT		s
 ARG	flag		u_int32_t	lu
 END
 
-BEGIN write	48	145
+BEGIN_COMPAT write	60	145
+DBT	name		DBT		s
+DBT	dirname		DBT		s
+ARG	appname		u_int32_t	lu
+ARG	pgsize		u_int32_t	lu
+ARG	pageno		db_pgno_t	lu
+ARG	offset		u_int32_t	lu
+DBT	page		DBT		s
+ARG	flag		u_int32_t	lu
+END
+
+BEGIN write	60p1	145
 DBT	name		DBT		s
 DBT	dirname		DBT		s
 ARG	appname		u_int32_t	lu
@@ -83,6 +107,42 @@ ARG	flag		u_int32_t	lu
 END
 
 /*
+ * write_file: log the writing of data into a file.
+ *
+ * name: file containing the data.
+ * appname: indicates if the name needs to go through __db_appname
+ * offset_lo: offset in the file, low part of a 64 bit integer.
+ * offset_hi: offset in the file, high part of a 64 bit integer.
+ * old_data: Data being overwritten, if there is any
+ * new_data: Data being written to the file.
+ * flag: DB_FOP_APPEND (0x00000001), DB_FOP_CREATE (0x00000002) and
+ *  DB_FOP_REDO (0x00000008).  Used to tell how the operation can be
+ *  undone, truncating in the case of append and deleting the file in
+ *  the case of create, and whether enough information was logged so
+ *  that the operation can be redone.
+ */
+BEGIN_COMPAT write_file	60	86
+DBT	name		DBT		s
+DBT	dirname		DBT		s
+ARG	appname		u_int32_t	lu
+ARG	offset_lo	u_int32_t	lu
+ARG	offset_hi	u_int32_t	lu
+DBT	old_data	DBT		s
+DBT	new_data	DBT		s
+ARG	flag		u_int32_t	lu
+END
+
+BEGIN write_file	60p1	86
+DBT	name		DBT		s
+DBT	dirname		DBT		s
+ARG	appname		u_int32_t	lu
+LONGARG	offset		u_int64_t	llu
+DBT	old_data	DBT		s
+DBT	new_data	DBT		s
+ARG	flag		u_int32_t	lu
+END
+
+/*
  * rename: move a file from one name to another.
  * The appname value indicates if this is a path name that should be used
  * directly (i.e., no interpretation) or if it is a pathname that should
@@ -105,8 +165,17 @@ DBT	fileid		DBT		s
 ARG	appname		u_int32_t	lu
 END
 
-BEGIN rename	48	146
-DUPLICATE rename_noundo	46	150
+BEGIN_COMPAT rename	60	146
+DUPLICATE rename_noundo 60	150
+DBT	oldname		DBT		s
+DBT	newname		DBT		s
+DBT	dirname		DBT		s
+DBT	fileid		DBT		s
+ARG	appname		u_int32_t	lu
+END
+
+BEGIN rename	60p1	146
+DUPLICATE rename_noundo	60p1	150
 DBT	oldname		DBT		s
 DBT	newname		DBT		s
 DBT	dirname		DBT		s
@@ -128,7 +197,15 @@ END
  * child:	The transaction that removed or renamed the file.
  */
  */
-BEGIN file_remove	42	141
+BEGIN_COMPAT file_remove	60	141
+DBT	real_fid	DBT	s
+DBT	tmp_fid		DBT	s
+DBT	name		DBT	s
+ARG	appname		u_int32_t	lu
+ARG	child		u_int32_t	lx
+END
+
+BEGIN file_remove	60p1	141
 DBT	real_fid	DBT	s
 DBT	tmp_fid		DBT	s
 DBT	name		DBT	s
diff --git a/src/fileops/fileops_auto.c b/src/fileops/fileops_auto.c
index 0db619a5..eff1377b 100644
--- a/src/fileops/fileops_auto.c
+++ b/src/fileops/fileops_auto.c
@@ -14,6 +14,13 @@ DB_LOG_RECSPEC __fop_create_42_desc[] = {
 	{LOGREC_ARG, SSZ(__fop_create_42_args, mode), "mode", "%o"},
 	{LOGREC_Done, 0, "", ""}
 };
+DB_LOG_RECSPEC __fop_create_60_desc[] = {
+	{LOGREC_DBT, SSZ(__fop_create_60_args, name), "name", ""},
+	{LOGREC_DBT, SSZ(__fop_create_60_args, dirname), "dirname", ""},
+	{LOGREC_ARG, SSZ(__fop_create_60_args, appname), "appname", "%lu"},
+	{LOGREC_ARG, SSZ(__fop_create_60_args, mode), "mode", "%o"},
+	{LOGREC_Done, 0, "", ""}
+};
 DB_LOG_RECSPEC __fop_create_desc[] = {
 	{LOGREC_DBT, SSZ(__fop_create_args, name), "name", ""},
 	{LOGREC_DBT, SSZ(__fop_create_args, dirname), "dirname", ""},
@@ -21,6 +28,12 @@ DB_LOG_RECSPEC __fop_create_desc[] = {
 	{LOGREC_ARG, SSZ(__fop_create_args, mode), "mode", "%o"},
 	{LOGREC_Done, 0, "", ""}
 };
+DB_LOG_RECSPEC __fop_remove_60_desc[] = {
+	{LOGREC_DBT, SSZ(__fop_remove_60_args, name), "name", ""},
+	{LOGREC_DBT, SSZ(__fop_remove_60_args, fid), "fid", ""},
+	{LOGREC_ARG, SSZ(__fop_remove_60_args, appname), "appname", "%lu"},
+	{LOGREC_Done, 0, "", ""}
+};
 DB_LOG_RECSPEC __fop_remove_desc[] = {
 	{LOGREC_DBT, SSZ(__fop_remove_args, name), "name", ""},
 	{LOGREC_DBT, SSZ(__fop_remove_args, fid), "fid", ""},
@@ -37,6 +50,17 @@ DB_LOG_RECSPEC __fop_write_42_desc[] = {
 	{LOGREC_ARG, SSZ(__fop_write_42_args, flag), "flag", "%lu"},
 	{LOGREC_Done, 0, "", ""}
 };
+DB_LOG_RECSPEC __fop_write_60_desc[] = {
+	{LOGREC_DBT, SSZ(__fop_write_60_args, name), "name", ""},
+	{LOGREC_DBT, SSZ(__fop_write_60_args, dirname), "dirname", ""},
+	{LOGREC_ARG, SSZ(__fop_write_60_args, appname), "appname", "%lu"},
+	{LOGREC_ARG, SSZ(__fop_write_60_args, pgsize), "pgsize", "%lu"},
+	{LOGREC_ARG, SSZ(__fop_write_60_args, pageno), "pageno", "%lu"},
+	{LOGREC_ARG, SSZ(__fop_write_60_args, offset), "offset", "%lu"},
+	{LOGREC_DBT, SSZ(__fop_write_60_args, page), "page", ""},
+	{LOGREC_ARG, SSZ(__fop_write_60_args, flag), "flag", "%lu"},
+	{LOGREC_Done, 0, "", ""}
+};
 DB_LOG_RECSPEC __fop_write_desc[] = {
 	{LOGREC_DBT, SSZ(__fop_write_args, name), "name", ""},
 	{LOGREC_DBT, SSZ(__fop_write_args, dirname), "dirname", ""},
@@ -48,6 +72,27 @@ DB_LOG_RECSPEC __fop_write_desc[] = {
 	{LOGREC_ARG, SSZ(__fop_write_args, flag), "flag", "%lu"},
 	{LOGREC_Done, 0, "", ""}
 };
+DB_LOG_RECSPEC __fop_write_file_60_desc[] = {
+	{LOGREC_DBT, SSZ(__fop_write_file_60_args, name), "name", ""},
+	{LOGREC_DBT, SSZ(__fop_write_file_60_args, dirname), "dirname", ""},
+	{LOGREC_ARG, SSZ(__fop_write_file_60_args, appname), "appname", "%lu"},
+	{LOGREC_ARG, SSZ(__fop_write_file_60_args, offset_lo), "offset_lo", "%lu"},
+	{LOGREC_ARG, SSZ(__fop_write_file_60_args, offset_hi), "offset_hi", "%lu"},
+	{LOGREC_DBT, SSZ(__fop_write_file_60_args, old_data), "old_data", ""},
+	{LOGREC_DBT, SSZ(__fop_write_file_60_args, new_data), "new_data", ""},
+	{LOGREC_ARG, SSZ(__fop_write_file_60_args, flag), "flag", "%lu"},
+	{LOGREC_Done, 0, "", ""}
+};
+DB_LOG_RECSPEC __fop_write_file_desc[] = {
+	{LOGREC_DBT, SSZ(__fop_write_file_args, name), "name", ""},
+	{LOGREC_DBT, SSZ(__fop_write_file_args, dirname), "dirname", ""},
+	{LOGREC_ARG, SSZ(__fop_write_file_args, appname), "appname", "%lu"},
+	{LOGREC_LONGARG, SSZ(__fop_write_file_args, offset), "offset", ""},
+	{LOGREC_DBT, SSZ(__fop_write_file_args, old_data), "old_data", ""},
+	{LOGREC_DBT, SSZ(__fop_write_file_args, new_data), "new_data", ""},
+	{LOGREC_ARG, SSZ(__fop_write_file_args, flag), "flag", "%lu"},
+	{LOGREC_Done, 0, "", ""}
+};
 DB_LOG_RECSPEC __fop_rename_42_desc[] = {
 	{LOGREC_DBT, SSZ(__fop_rename_42_args, oldname), "oldname", ""},
 	{LOGREC_DBT, SSZ(__fop_rename_42_args, newname), "newname", ""},
@@ -62,6 +107,22 @@ DB_LOG_RECSPEC __fop_rename_noundo_46_desc[] = {
 	{LOGREC_ARG, SSZ(__fop_rename_42_args, appname), "appname", "%lu"},
 	{LOGREC_Done, 0, "", ""}
 };
+DB_LOG_RECSPEC __fop_rename_60_desc[] = {
+	{LOGREC_DBT, SSZ(__fop_rename_60_args, oldname), "oldname", ""},
+	{LOGREC_DBT, SSZ(__fop_rename_60_args, newname), "newname", ""},
+	{LOGREC_DBT, SSZ(__fop_rename_60_args, dirname), "dirname", ""},
+	{LOGREC_DBT, SSZ(__fop_rename_60_args, fileid), "fileid", ""},
+	{LOGREC_ARG, SSZ(__fop_rename_60_args, appname), "appname", "%lu"},
+	{LOGREC_Done, 0, "", ""}
+};
+DB_LOG_RECSPEC __fop_rename_noundo_60_desc[] = {
+	{LOGREC_DBT, SSZ(__fop_rename_60_args, oldname), "oldname", ""},
+	{LOGREC_DBT, SSZ(__fop_rename_60_args, newname), "newname", ""},
+	{LOGREC_DBT, SSZ(__fop_rename_60_args, dirname), "dirname", ""},
+	{LOGREC_DBT, SSZ(__fop_rename_60_args, fileid), "fileid", ""},
+	{LOGREC_ARG, SSZ(__fop_rename_60_args, appname), "appname", "%lu"},
+	{LOGREC_Done, 0, "", ""}
+};
 DB_LOG_RECSPEC __fop_rename_desc[] = {
 	{LOGREC_DBT, SSZ(__fop_rename_args, oldname), "oldname", ""},
 	{LOGREC_DBT, SSZ(__fop_rename_args, newname), "newname", ""},
@@ -78,6 +139,14 @@ DB_LOG_RECSPEC __fop_rename_noundo_desc[] = {
 	{LOGREC_ARG, SSZ(__fop_rename_args, appname), "appname", "%lu"},
 	{LOGREC_Done, 0, "", ""}
 };
+DB_LOG_RECSPEC __fop_file_remove_60_desc[] = {
+	{LOGREC_DBT, SSZ(__fop_file_remove_60_args, real_fid), "real_fid", ""},
+	{LOGREC_DBT, SSZ(__fop_file_remove_60_args, tmp_fid), "tmp_fid", ""},
+	{LOGREC_DBT, SSZ(__fop_file_remove_60_args, name), "name", ""},
+	{LOGREC_ARG, SSZ(__fop_file_remove_60_args, appname), "appname", "%lu"},
+	{LOGREC_ARG, SSZ(__fop_file_remove_60_args, child), "child", "%lx"},
+	{LOGREC_Done, 0, "", ""}
+};
 DB_LOG_RECSPEC __fop_file_remove_desc[] = {
 	{LOGREC_DBT, SSZ(__fop_file_remove_args, real_fid), "real_fid", ""},
 	{LOGREC_DBT, SSZ(__fop_file_remove_args, tmp_fid), "tmp_fid", ""},
@@ -106,6 +175,9 @@ __fop_init_recover(env, dtabp)
 	    __fop_write_recover, DB___fop_write)) != 0)
 		return (ret);
 	if ((ret = __db_add_recovery_int(env, dtabp,
+	    __fop_write_file_recover, DB___fop_write_file)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery_int(env, dtabp,
 	    __fop_rename_recover, DB___fop_rename)) != 0)
 		return (ret);
 	if ((ret = __db_add_recovery_int(env, dtabp,
diff --git a/src/fileops/fileops_autop.c b/src/fileops/fileops_autop.c
index 6e271a17..784aa1d0 100644
--- a/src/fileops/fileops_autop.c
+++ b/src/fileops/fileops_autop.c
@@ -27,6 +27,23 @@ __fop_create_42_print(env, dbtp, lsnp, notused2, info)
 }
 
 /*
+ * PUBLIC: int __fop_create_60_print __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_create_60_print(env, dbtp, lsnp, notused2, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *info;
+{
+	COMPQUIET(notused2, DB_TXN_PRINT);
+
+	return (__log_print_record(env, dbtp, lsnp, "__fop_create_60", __fop_create_60_desc, info));
+}
+
+/*
  * PUBLIC: int __fop_create_print __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -44,6 +61,23 @@ __fop_create_print(env, dbtp, lsnp, notused2, info)
 }
 
 /*
+ * PUBLIC: int __fop_remove_60_print __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_remove_60_print(env, dbtp, lsnp, notused2, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *info;
+{
+	COMPQUIET(notused2, DB_TXN_PRINT);
+
+	return (__log_print_record(env, dbtp, lsnp, "__fop_remove_60", __fop_remove_60_desc, info));
+}
+
+/*
  * PUBLIC: int __fop_remove_print __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -78,6 +112,23 @@ __fop_write_42_print(env, dbtp, lsnp, notused2, info)
 }
 
 /*
+ * PUBLIC: int __fop_write_60_print __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_write_60_print(env, dbtp, lsnp, notused2, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *info;
+{
+	COMPQUIET(notused2, DB_TXN_PRINT);
+
+	return (__log_print_record(env, dbtp, lsnp, "__fop_write_60", __fop_write_60_desc, info));
+}
+
+/*
  * PUBLIC: int __fop_write_print __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -95,6 +146,40 @@ __fop_write_print(env, dbtp, lsnp, notused2, info)
 }
 
 /*
+ * PUBLIC: int __fop_write_file_60_print __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_write_file_60_print(env, dbtp, lsnp, notused2, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *info;
+{
+	COMPQUIET(notused2, DB_TXN_PRINT);
+
+	return (__log_print_record(env, dbtp, lsnp, "__fop_write_file_60", __fop_write_file_60_desc, info));
+}
+
+/*
+ * PUBLIC: int __fop_write_file_print __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_write_file_print(env, dbtp, lsnp, notused2, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *info;
+{
+	COMPQUIET(notused2, DB_TXN_PRINT);
+
+	return (__log_print_record(env, dbtp, lsnp, "__fop_write_file", __fop_write_file_desc, info));
+}
+
+/*
  * PUBLIC: int __fop_rename_42_print __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -112,6 +197,23 @@ __fop_rename_42_print(env, dbtp, lsnp, notused2, info)
 }
 
 /*
+ * PUBLIC: int __fop_rename_60_print __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_rename_60_print(env, dbtp, lsnp, notused2, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *info;
+{
+	COMPQUIET(notused2, DB_TXN_PRINT);
+
+	return (__log_print_record(env, dbtp, lsnp, "__fop_rename_60", __fop_rename_60_desc, info));
+}
+
+/*
  * PUBLIC: int __fop_rename_print __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -129,6 +231,23 @@ __fop_rename_print(env, dbtp, lsnp, notused2, info)
 }
 
 /*
+ * PUBLIC: int __fop_file_remove_60_print __P((ENV *, DBT *,
+ * PUBLIC:     DB_LSN *, db_recops, void *));
+ */
+int
+__fop_file_remove_60_print(env, dbtp, lsnp, notused2, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *info;
+{
+	COMPQUIET(notused2, DB_TXN_PRINT);
+
+	return (__log_print_record(env, dbtp, lsnp, "__fop_file_remove_60", __fop_file_remove_60_desc, info));
+}
+
+/*
  * PUBLIC: int __fop_file_remove_print __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -165,6 +284,9 @@ __fop_init_print(env, dtabp)
 	    __fop_write_print, DB___fop_write)) != 0)
 		return (ret);
 	if ((ret = __db_add_recovery_int(env, dtabp,
+	    __fop_write_file_print, DB___fop_write_file)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery_int(env, dtabp,
 	    __fop_rename_print, DB___fop_rename)) != 0)
 		return (ret);
 	if ((ret = __db_add_recovery_int(env, dtabp,
diff --git a/src/fileops/fop_basic.c b/src/fileops/fop_basic.c
index d6c707f2..c1280d76 100644
--- a/src/fileops/fop_basic.c
+++ b/src/fileops/fop_basic.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -253,6 +253,220 @@ err:	if (local_open &&
 }
 
 /*
+ * Used to reduce the maximum amount of data that will be logged at a time.
+ * Large writes are logged as a series of smaller writes to prevent a
+ * single log from being larger than the log buffer or a log file.
+ */
+#define	LOG_OVERWRITE_MULTIPLIER 0.75
+#define	LOG_REDO_MULTIPLIER 0.75
+#define	LOG_OVERWRITE_REDO_MULTIPLIER 0.33
+
+/*
+ * __fop_write_file
+ *
+ * Write "size" bytes from "buf" to file "name" beginning at offset "off."
+ * dirname is the directory in which the file is stored, fhp the file
+ * handle to write too, and flags contains whether this is creating or
+ * appending data, which changes how the data is logged.
+ * The other __fop_write is designed for writing pages to databases, this
+ * function writes generic data to files, usually blob files.
+ *
+ * PUBLIC: int __fop_write_file __P((ENV *, DB_TXN *,
+ * PUBLIC:     const char *, const char *, APPNAME, DB_FH *,
+ * PUBLIC:     off_t, void *, size_t, u_int32_t));
+ */
+int
+__fop_write_file(env, txn,
+    name, dirname, appname, fhp, off, buf, size, flags)
+	ENV *env;
+	DB_TXN *txn;
+	const char *name, *dirname;
+	APPNAME appname;
+	DB_FH *fhp;
+	off_t off;
+	void *buf;
+	size_t size;
+	u_int32_t flags;
+{
+	DBT new_data, old_data, namedbt, dirdbt;
+	DB_LOG *dblp;
+	DB_LSN lsn;
+	off_t cur_off;
+	int local_open, ret, t_ret;
+	size_t cur_size, nbytes, tmp_size;
+	u_int32_t lflags, lgbuf_size, lgsize, lgfile_size;
+	char *real_name;
+	void *cur_ptr;
+
+	ret = local_open = 0;
+	real_name = NULL;
+	lflags = 0;
+	memset(&new_data, 0, sizeof(new_data));
+	memset(&old_data, 0, sizeof(old_data));
+	ZERO_LSN(lsn);
+
+	if (fhp == NULL) {
+		/* File isn't open; we need to reopen it. */
+		if ((ret = __db_appname(env,
+		    appname, name, &dirname, &real_name)) != 0)
+			return (ret);
+
+		if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0)
+			goto err;
+		local_open = 1;
+	}
+
+	if (DBENV_LOGGING(env)
+#if !defined(DEBUG_WOP)
+	    && txn != NULL
+#endif
+	    ) {
+		DB_INIT_DBT(namedbt, name, strlen(name) + 1);
+		if (dirname != NULL)
+			DB_INIT_DBT(dirdbt, dirname, strlen(dirname) + 1);
+		else
+			memset(&dirdbt, 0, sizeof(dirdbt));
+		/*
+		 * If the write is larger than the log buffer or file size,
+		 * then log it as a set of smaller writes.
+		 */
+		cur_off = off;
+		cur_ptr = buf;
+		cur_size = size;
+		dblp = env->lg_handle;
+		LOG_SYSTEM_LOCK(env);
+		lgfile_size = ((LOG *)dblp->reginfo.primary)->log_nsize;
+		LOG_SYSTEM_UNLOCK(env);
+		if ((ret = __log_get_lg_bsize(env->dbenv, &lgbuf_size)) != 0)
+			goto err;
+
+		if (lgfile_size > lgbuf_size)
+			lgsize = lgbuf_size;
+		else
+			lgsize = lgfile_size;
+
+		/*
+		 * Parial logging only logs enough data to undo an operation.
+		 */
+		if (LF_ISSET(DB_FOP_PARTIAL_LOG)) {
+			/* No data needs to be logged for append and create. */
+			if (LF_ISSET(DB_FOP_APPEND | DB_FOP_CREATE)) {
+				lflags |=
+				    flags & (DB_FOP_APPEND | DB_FOP_CREATE);
+				cur_size = 0;
+				goto log;
+			} else {
+				/*
+				 * Writting in the middle of the blob requires
+				 * logging the data being overwritten.
+				 */
+				lgsize = (u_int32_t)
+				    (lgsize * LOG_OVERWRITE_MULTIPLIER);
+			}
+		} else {
+			/* Log that the operation can be redone from logs. */
+			lflags |= DB_FOP_REDO;
+			/* Just log the new data for append and create */
+			if (LF_ISSET(DB_FOP_APPEND | DB_FOP_CREATE)) {
+				lgsize = (u_int32_t)
+				    (lgsize * LOG_REDO_MULTIPLIER);
+				lflags |= flags &
+				    (DB_FOP_APPEND | DB_FOP_CREATE);
+			} else {
+				/*
+				 * Writting in the middle of the blob requires
+				 * logging both the old and new data.
+				 */
+				lgsize = (u_int32_t)
+				    (lgsize * LOG_OVERWRITE_REDO_MULTIPLIER);
+			}
+		}
+
+		while (cur_size > 0) {
+			new_data.data = cur_ptr;
+			if (cur_size > lgsize) {
+				new_data.size = lgsize;
+				cur_size -= lgsize;
+			} else {
+				new_data.size = (u_int32_t)cur_size;
+				cur_size = 0;
+			}
+			cur_ptr = (unsigned char *)cur_ptr + new_data.size;
+			/*
+			 * If not creating or appending the file, then
+			 * the data being overwritten needs to be read
+			 * in so it can be written back in on abort.
+			 */
+			if (!(lflags & (DB_FOP_CREATE | DB_FOP_APPEND))) {
+				DB_ASSERT(env, old_data.data == NULL ||
+				    new_data.size <= old_data.size);
+				old_data.size = new_data.size;
+				if (old_data.data == NULL) {
+					if ((ret = __os_malloc(env,
+					    old_data.size,
+					    &old_data.data)) != 0)
+						goto err;
+				}
+				if ((ret = __os_seek(
+				    env, fhp, 0, 0, cur_off)) != 0)
+					goto err;
+				if ((ret = __os_read(env, fhp, old_data.data,
+				    old_data.size, &nbytes)) != 0)
+					goto err;
+			}
+log:			tmp_size = new_data.size;
+			/*
+			 * No need to log the new data if this operation
+			 * cannot be redone from logs.
+			 */
+			if (!(lflags & DB_FOP_REDO))
+				memset(&new_data, 0, sizeof(new_data));
+			if ((ret = __fop_write_file_log(
+			    env, txn, &lsn, flags, &namedbt, &dirdbt,
+			    (u_int32_t)appname, (u_int64_t)cur_off,
+			    &old_data, &new_data, lflags)) != 0)
+				goto err;
+			cur_off += tmp_size;
+		}
+		/*
+		 * If not creating, we have to flush the logs so that they
+		 * will be available to undo internal writes and appends in case
+		 * of a crash.
+		 */
+		if (!(LF_ISSET(DB_FOP_CREATE)) &&
+		    txn != NULL && !F_ISSET(txn, TXN_NOSYNC))
+			if ((ret = __log_flush(env, &lsn)) != 0)
+				goto err;
+	}
+
+	/* Seek to offset. */
+	if ((ret = __os_seek(env, fhp, 0, 0, off)) != 0)
+		goto err;
+
+	/* Now do the write. */
+	if ((ret = __os_write(env, fhp, buf, size, &nbytes)) != 0)
+		goto err;
+
+	if (nbytes != size) {
+		__db_errx(env, DB_STR_A("0238",
+		    "Error wrote %lld bytes to file %s instead of %lld .",
+		    "%lld %s %lld"),
+		    (long long)nbytes, name, (long long)size);
+		goto err;
+	}
+
+err:	if (local_open &&
+	    (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
+			ret = t_ret;
+
+	if (real_name != NULL)
+		__os_free(env, real_name);
+	if (old_data.data != NULL)
+		__os_free(env, old_data.data);
+	return (ret);
+}
+
+/*
  * __fop_rename --
  *	Change a file's name.
  *
diff --git a/src/fileops/fop_rec.c b/src/fileops/fop_rec.c
index 52d6175d..71a81ad6 100644
--- a/src/fileops/fop_rec.c
+++ b/src/fileops/fop_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,16 +9,63 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/fop.h"
 #include "dbinc/db_am.h"
 #include "dbinc/mp.h"
 #include "dbinc/txn.h"
 
+typedef enum {
+	DB_APP53_NONE=0,		/* No type (region). */
+	DB_APP53_DATA,			/* Data file. */
+	DB_APP53_LOG,			/* Log file. */
+	DB_APP53_META,			/* Persistent metadata file. */
+	DB_APP53_RECOVER,		/* We are in recovery. */
+	DB_APP53_TMP			/* Temporary file. */
+} APPNAME53;
+
+static APPNAME __fop_convert_appname __P((ENV *, APPNAME53));
+static int __fop_create_recover_int __P((ENV *, char *, db_recops, int));
 static int __fop_rename_recover_int
     __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int));
+static int __fop_rename_60_recover_int
+    __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int));
 static int __fop_rename_42_recover_int
     __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int));
+static int __fop_write_file_recover_int
+    __P((ENV *, db_recops,
+    APPNAME, u_int32_t, DBT *, DBT *, DBT *, DBT *, off_t, DB_TXN *));
+
+/*
+ * The APPNAME enumermation was changed in 6.0 to include DB_APP_BLOB.  APPNAME
+ * is used by the log records __fop_create, __fop_write, and __fop_rename.
+ * __fop_write_file also includes an APPNAME field, but that record was created
+ * in 6.0.
+ */
+static APPNAME
+__fop_convert_appname(env, appname)
+	ENV *env;
+	APPNAME53 appname;
+{
+	switch(appname)
+	{
+		case DB_APP53_NONE:
+			return (DB_APP_NONE);
+		case DB_APP53_DATA:
+			return (DB_APP_DATA);
+		case DB_APP53_LOG:
+			return (DB_APP_LOG);
+		case DB_APP53_META:
+			return (DB_APP_META);
+		case DB_APP53_RECOVER:
+			return (DB_APP_RECOVER);
+		case DB_APP53_TMP:
+			return (DB_APP_TMP);
+	}
+	DB_ASSERT(env, 0);
+	return (DB_APP_NONE);
+}
 
 /*
  * The transactional guarantees Berkeley DB provides for file
@@ -50,6 +97,85 @@ static int __fop_rename_42_recover_int
  * it does not apply.
  */
 
+static int
+__fop_create_recover_int(env, real_name, op, mode)
+	ENV *env;
+	char *real_name;
+	db_recops op;
+	int mode;
+{
+	DB_FH *fhp;
+	DBMETA *meta;
+	u_int8_t mbuf[DBMETASIZE];
+	int ret;
+	char *path;
+#ifdef	HAVE_REPLICATION
+	DELAYED_BLOB_LIST *dbl;
+	int view_partial;
+
+	dbl = NULL;
+#endif
+	meta = (DBMETA *)mbuf;
+	ret = 0;
+
+	if (DB_UNDO(op)) {
+		/*
+		 * If the file was opened in mpool, we must mark it as
+		 * dead via nameop which will also unlink the file.
+		 */
+		if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) {
+			if (__fop_read_meta(env,
+			    real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 &&
+			    __db_chk_meta(env, NULL, meta, DB_CHK_META) == 0) {
+				if ((ret = __memp_nameop(env,
+				    meta->uid, NULL, real_name, NULL, 0)) != 0)
+					goto out;
+			} else {
+				(void)__os_closehandle(env, fhp);
+				goto do_unlink;
+			}
+			(void)__os_closehandle(env, fhp);
+		} else
+do_unlink:		(void)__os_unlink(env, real_name, 0);
+	} else if (DB_REDO(op)) {
+		path = real_name;
+#ifdef DB_WIN32
+		/*
+		 * Absolute paths on windows can result in it creating a
+		 * "C" or "D" directory in the working directory.
+		 */
+		if (__os_abspath(real_name))
+			path += 2;
+#endif
+
+#ifdef	HAVE_REPLICATION
+		/*
+		 * Prevent replication of blob files if their owning database
+		 * is not replicated.
+		 */
+		if (IS_VIEW_SITE(env) && IS_BLOB_FILE(path)) {
+			if ((ret = __rep_call_partial(env,
+			    path, &view_partial, 0, &dbl)) != 0)
+				goto out;
+			DB_ASSERT(env, dbl == NULL);
+			if (view_partial == 0)
+				goto out;
+		}
+#endif
+		/* Blob directories might not exist yet. */
+		if (__os_exists(env, real_name, NULL) != 0 &&
+		    (ret = __db_mkpath(env, path)) != 0)
+			goto out;
+
+		if ((ret = __os_open(env, real_name,
+		    0, DB_OSO_CREATE, mode, &fhp)) == 0)
+			(void)__os_closehandle(env, fhp);
+		else
+			goto out;
+	}
+out:	return (ret);
+}
+
 /*
  * __fop_create_recover --
  *	Recovery function for create.
@@ -66,9 +192,6 @@ __fop_create_recover(env, dbtp, lsnp, op, info)
 	void *info;
 {
 	__fop_create_args *argp;
-	DB_FH *fhp;
-	DBMETA *meta;
-	u_int8_t mbuf[DBMETASIZE];
 	int ret;
 	char *real_name;
 	const char *dirname;
@@ -78,7 +201,6 @@ __fop_create_recover(env, dbtp, lsnp, op, info)
 	real_name = NULL;
 	REC_PRINT(__fop_create_print);
 	REC_NOOP_INTRO(__fop_create_read);
-	meta = (DBMETA *)mbuf;
 
 	if (argp->dirname.size == 0)
 		dirname = NULL;
@@ -90,32 +212,60 @@ __fop_create_recover(env, dbtp, lsnp, op, info)
 	    (const char *)argp->name.data, &dirname, &real_name)) != 0)
 		goto out;
 
-	if (DB_UNDO(op)) {
-		/*
-		 * If the file was opened in mpool, we must mark it as
-		 * dead via nameop which will also unlink the file.
-		 */
-		if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) {
-			if (__fop_read_meta(env,
-			    real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 &&
-			    __db_chk_meta(env, NULL, meta, 1) == 0) {
-				if ((ret = __memp_nameop(env,
-				    meta->uid, NULL, real_name, NULL, 0)) != 0)
-					goto out;
-			} else {
-				(void)__os_closehandle(env, fhp);
-				goto do_unlink;
-			}
-			(void)__os_closehandle(env, fhp);
-		} else
-do_unlink:		(void)__os_unlink(env, real_name, 0);
-	} else if (DB_REDO(op)) {
-		if ((ret = __os_open(env, real_name, 0,
-		    DB_OSO_CREATE, (int)argp->mode, &fhp)) == 0)
-			(void)__os_closehandle(env, fhp);
-		else
-			goto out;
-	}
+	if ((ret = __fop_create_recover_int(
+	    env, real_name, op, (int)argp->mode)) != 0)
+		goto out;
+
+	*lsnp = argp->prev_lsn;
+
+out: if (real_name != NULL)
+		__os_free(env, real_name);
+
+	REC_NOOP_CLOSE;
+}
+
+/*
+ * __fop_create_60_recover --
+ *	Recovery function for create.
+ *
+ * PUBLIC: int __fop_create_60_recover
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__fop_create_60_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__fop_create_60_args *argp;
+	APPNAME appname;
+	int ret;
+	char *real_name;
+	const char *dirname;
+
+	COMPQUIET(info, NULL);
+
+	real_name = NULL;
+	REC_PRINT(__fop_create_60_print);
+	REC_NOOP_INTRO(__fop_create_60_read);
+
+	if (argp->dirname.size == 0)
+		dirname = NULL;
+	else
+		dirname = (const char *)argp->dirname.data;
+
+	appname = __fop_convert_appname(env, (APPNAME53)argp->appname);
+
+	if ((ret = __db_appname(env,
+	    appname == DB_APP_DATA ? DB_APP_RECOVER : appname,
+	    (const char *)argp->name.data, &dirname, &real_name)) != 0)
+		goto out;
+
+	if ((ret = __fop_create_recover_int(
+	    env, real_name, op, (int)argp->mode)) != 0)
+		goto out;
 
 	*lsnp = argp->prev_lsn;
 
@@ -144,6 +294,7 @@ __fop_create_42_recover(env, dbtp, lsnp, op, info)
 	DB_FH *fhp;
 	DBMETA *meta;
 	u_int8_t mbuf[DBMETASIZE];
+	APPNAME appname;
 	int ret;
 	char *real_name;
 
@@ -153,8 +304,9 @@ __fop_create_42_recover(env, dbtp, lsnp, op, info)
 	REC_PRINT(__fop_create_print);
 	REC_NOOP_INTRO(__fop_create_read);
 	meta = (DBMETA *)mbuf;
+	appname = __fop_convert_appname(env, (APPNAME53)argp->appname);
 
-	if ((ret = __db_appname(env, (APPNAME)argp->appname,
+	if ((ret = __db_appname(env, appname,
 	    (const char *)argp->name.data, NULL, &real_name)) != 0)
 		goto out;
 
@@ -166,7 +318,7 @@ __fop_create_42_recover(env, dbtp, lsnp, op, info)
 		if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) {
 			if (__fop_read_meta(env,
 			    real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 &&
-			    __db_chk_meta(env, NULL, meta, 1) == 0) {
+			    __db_chk_meta(env, NULL, meta, DB_CHK_META) == 0) {
 				if ((ret = __memp_nameop(env,
 				    meta->uid, NULL, real_name, NULL, 0)) != 0)
 					goto out;
@@ -232,6 +384,49 @@ out:	if (real_name != NULL)
 }
 
 /*
+ * __fop_remove_60_recover --
+ *	Recovery function for remove.
+ *
+ * PUBLIC: int __fop_remove_60_recover
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__fop_remove_60_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__fop_remove_60_args *argp;
+	APPNAME appname;
+	int ret;
+	char *real_name;
+
+	COMPQUIET(info, NULL);
+
+	real_name = NULL;
+	REC_PRINT(__fop_remove_60_print);
+	REC_NOOP_INTRO(__fop_remove_60_read);
+
+	appname = __fop_convert_appname(env, (APPNAME53)argp->appname);
+
+	if ((ret = __db_appname(env, appname,
+	    (const char *)argp->name.data, NULL, &real_name)) != 0)
+		goto out;
+
+	/* Its ok if the file is not there. */
+	if (DB_REDO(op))
+		(void)__memp_nameop(env,
+		    (u_int8_t *)argp->fid.data, NULL, real_name, NULL, 0);
+
+	*lsnp = argp->prev_lsn;
+out:	if (real_name != NULL)
+		__os_free(env, real_name);
+	REC_NOOP_CLOSE;
+}
+
+/*
  * __fop_write_recover --
  *	Recovery function for writechunk.
  *
@@ -251,6 +446,15 @@ __fop_write_recover(env, dbtp, lsnp, op, info)
 
 	COMPQUIET(info, NULL);
 
+#ifndef HAVE_64BIT_TYPES
+	COMPQUIET(dbtp, NULL);
+	COMPQUIET(lsnp, NULL);
+	COMPQUIET(op, 0);
+	__db_errx(env, DB_STR("0243",
+	    "Blobs require 64 integer compiler support."));
+	return (DB_OPNOTSUP);
+#endif
+
 	REC_PRINT(__fop_write_print);
 	REC_NOOP_INTRO(__fop_write_read);
 
@@ -272,6 +476,48 @@ __fop_write_recover(env, dbtp, lsnp, op, info)
 }
 
 /*
+ * __fop_write_60_recover --
+ *	Recovery function for writechunk.
+ *
+ * PUBLIC: int __fop_write_60_recover
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__fop_write_60_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__fop_write_60_args *argp;
+	APPNAME appname;
+	int ret;
+
+	COMPQUIET(info, NULL);
+
+	REC_PRINT(__fop_write_60_print);
+	REC_NOOP_INTRO(__fop_write_60_read);
+
+	ret = 0;
+	if (DB_UNDO(op))
+		DB_ASSERT(env, argp->flag != 0);
+	else if (DB_REDO(op)) {
+		appname = __fop_convert_appname(env, (APPNAME53)argp->appname);
+		ret = __fop_write(env,
+		    argp->txnp, argp->name.data,
+		    argp->dirname.size == 0 ? NULL : argp->dirname.data,
+		    appname == DB_APP_DATA ? DB_APP_RECOVER : appname,
+		    NULL, argp->pgsize, argp->pageno, argp->offset,
+		    argp->page.data, argp->page.size, argp->flag, 0);
+	}
+
+	if (ret == 0)
+		*lsnp = argp->prev_lsn;
+	REC_NOOP_CLOSE;
+}
+
+/*
  * __fop_write_42_recover --
  *	Recovery function for writechunk.
  *
@@ -287,6 +533,7 @@ __fop_write_42_recover(env, dbtp, lsnp, op, info)
 	void *info;
 {
 	__fop_write_args *argp;
+	APPNAME appname;
 	int ret;
 
 	COMPQUIET(info, NULL);
@@ -297,18 +544,194 @@ __fop_write_42_recover(env, dbtp, lsnp, op, info)
 	ret = 0;
 	if (DB_UNDO(op))
 		DB_ASSERT(env, argp->flag != 0);
-	else if (DB_REDO(op))
+	else if (DB_REDO(op)) {
+		appname = __fop_convert_appname(env, (APPNAME53)argp->appname);
 		ret = __fop_write(env,
-		    argp->txnp, argp->name.data, NULL, (APPNAME)argp->appname,
+		    argp->txnp, argp->name.data, NULL, appname,
 		    NULL, argp->pgsize, argp->pageno, argp->offset,
 		    argp->page.data, argp->page.size, argp->flag, 0);
+	}
+
+	if (ret == 0)
+		*lsnp = argp->prev_lsn;
+	REC_NOOP_CLOSE;
+}
+
+static int
+__fop_write_file_recover_int(
+    env, op, appname, flag, dirname, name, new_data, old_data, offset, txn)
+	ENV *env;
+	db_recops op;
+	APPNAME appname;
+	u_int32_t flag;
+	DBT *dirname;
+	DBT *name;
+	DBT *new_data;
+	DBT *old_data;
+	off_t offset;
+	DB_TXN *txn;
+{
+	DB_FH *fhp;
+	int ret;
+	size_t nbytes;
+	char *path;
+
+	fhp = NULL;
+	path = NULL;
+	ret = 0;
+
+	if (DB_UNDO(op)) {
+		if (flag & DB_FOP_CREATE) {
+			/*
+			 * File was created in this transaction. Do nothing,
+			 * destroying the file will undo the write.
+			 */
+		} else {
+			if ((ret = __db_appname(env,
+			    appname == DB_APP_DATA ? DB_APP_RECOVER :
+			    appname, name->data, NULL, &path)) != 0)
+				goto end;
+
+			if (__os_open(env, path, 0, 0, DB_MODE_600, &fhp) != 0)
+				goto end;
+
+			if (flag & DB_FOP_APPEND) {
+				/*
+				 * Appended to the end of the file, undo by
+				 * truncating the file.
+				 */
+				(void)__os_truncate(env, fhp, 0, 0, offset);
+			} else {
+				/*
+				 * Data overwritten in the middle of the file,
+				 * undo by writing back in the old data.
+				 */
+
+				/* Seek to offset. */
+				if ((__os_seek(env, fhp, 0, 0, offset)) != 0)
+					goto end;
+
+				/* Now do the write. */
+				ret = __os_write(env, fhp,
+				    old_data->data, old_data->size, &nbytes);
+			}
+		}
+	} else if (DB_REDO(op)) {
+		/*
+		 * Not all operations log enough data to be redone.  Since
+		 * files are flushed before the transaction commit this is
+		 * not an issue, unless we are on an HA client or initializing
+		 * from a backup.
+		 */
+		if (flag & DB_FOP_REDO) {
+			ret = __fop_write_file(env, txn, name->data,
+			    dirname->size == 0 ? NULL : dirname->data,
+			    appname == DB_APP_DATA ? DB_APP_RECOVER : appname,
+			    NULL, offset, new_data->data, new_data->size, 0);
+#ifdef	HAVE_REPLICATION
+			/*
+			 * Blob files of databases that are not replicated are
+			 * also not replicated.  So assume any ENOENT errors
+			 * are because the file was not replicated.
+			 */
+			if (ret == ENOENT && IS_VIEW_SITE(env))
+				ret = 0;
+#endif
+		} else {
+			/* DB_ASSERT(env, !IS_REP_CLIENT(env)); */
+		}
+	}
+
+end:	if (path != NULL)
+		__os_free(env, path);
+	if (fhp != NULL)
+		(void)__os_closehandle(env, fhp);
+	return (ret);
+}
 
+/*
+ * __fop_write_file_recover --
+ *	Recovery function for writing to a blob file.  Files are flushed before
+ *	the transaction is committed, so often the file operations do not need
+ *	to be redone or undone.  However, since no lsn is stored in the file,
+ *	we always try to redo or undo the operation, since it will not change
+ *	the final state of the file if the operation is not needed.  This also
+ *	means that this function has to be very tolerant of errors, such as
+ *	trying to open a file that was deleted, or truncate a file that is
+ *	already short.
+ *
+ * PUBLIC: int __fop_write_file_recover
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__fop_write_file_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__fop_write_file_args *argp;
+	int ret;
+	COMPQUIET(info, NULL);
+
+#ifndef HAVE_64BIT_TYPES
+	COMPQUIET(dbtp, NULL);
+	COMPQUIET(lsnp, NULL);
+	COMPQUIET(op, 0);
+	__db_errx(env, DB_STR("0244",
+	    "Blobs require 64 integer compiler support."));
+	return (DB_OPNOTSUP);
+#endif
+
+	REC_PRINT(__fop_write_file_print);
+	REC_NOOP_INTRO(__fop_write_file_read);
+
+	ret = __fop_write_file_recover_int(env, op,
+	    (APPNAME)argp->appname, argp->flag, &argp->dirname, &argp->name,
+	    &argp->new_data, &argp->old_data, (off_t)argp->offset, argp->txnp);
 	if (ret == 0)
 		*lsnp = argp->prev_lsn;
 	REC_NOOP_CLOSE;
 }
 
 /*
+ * __fop_write_file_60_recover --
+ *
+ * PUBLIC: int __fop_write_file_60_recover
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__fop_write_file_60_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__fop_write_file_60_args *argp;
+	off_t offset;
+	int ret;
+	COMPQUIET(info, NULL);
+
+	REC_PRINT(__fop_write_file_60_print);
+	REC_NOOP_INTRO(__fop_write_file_60_read);
+
+	/* The offset is stored as two u_in32_t values. */
+	GET_LO_HI(env, argp->offset_lo, argp->offset_hi, offset, ret);
+	if (ret != 0)
+		goto end;
+
+	ret = __fop_write_file_recover_int(env, op,
+	    (APPNAME)argp->appname, argp->flag, &argp->dirname, &argp->name,
+	    &argp->new_data, &argp->old_data, offset, argp->txnp);
+
+end:	if (ret == 0)
+		*lsnp = argp->prev_lsn;
+	REC_NOOP_CLOSE;
+}
+
+/*
  * __fop_rename_recover --
  *	Recovery functions for rename.  There are two variants that
  * both use the same utility function.  Had we known about this on day
@@ -408,7 +831,148 @@ __fop_rename_recover_int(env, dbtp, lsnp, op, info, undo)
 		if (__fop_read_meta(env,
 		    src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0)
 			goto done;
-		if (__db_chk_meta(env, NULL, meta, 1) != 0)
+		if (__db_chk_meta(env, NULL, meta, DB_CHK_META) != 0)
+			goto done;
+		if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0)
+			goto done;
+		(void)__os_closehandle(env, fhp);
+		fhp = NULL;
+		if (DB_REDO(op)) {
+			/*
+			 * Check to see if the target file exists.  If it
+			 * does and it does not have the proper id then
+			 * it is a later version.  We just remove the source
+			 * file since the state of the world is beyond this
+			 * point.
+			 */
+			if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 &&
+			    __fop_read_meta(env, src, mbuf,
+			    DBMETASIZE, fhp, 1, NULL) == 0 &&
+			    __db_chk_meta(env, NULL, meta, DB_CHK_META) == 0 &&
+			    memcmp(argp->fileid.data,
+			    meta->uid, DB_FILE_ID_LEN) != 0) {
+				(void)__memp_nameop(env,
+				    fileid, NULL, real_old, NULL, 0);
+				goto done;
+			}
+		}
+	}
+
+	if (undo && DB_UNDO(op))
+		(void)__memp_nameop(env, fileid,
+		    (const char *)argp->oldname.data, real_new, real_old, 0);
+	if (DB_REDO(op))
+		(void)__memp_nameop(env, fileid,
+		    (const char *)argp->newname.data, real_old, real_new, 0);
+
+done:	*lsnp = argp->prev_lsn;
+out:	if (real_new != NULL)
+		__os_free(env, real_new);
+	if (real_old != NULL)
+		__os_free(env, real_old);
+	if (fhp != NULL)
+		(void)__os_closehandle(env, fhp);
+
+	REC_NOOP_CLOSE;
+}
+
+/*
+ * __fop_rename_60_recover --
+ *
+ * PUBLIC: int __fop_rename_60_recover
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ *
+ * PUBLIC: int __fop_rename_noundo_60_recover
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+
+int
+__fop_rename_60_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	return (__fop_rename_60_recover_int(env, dbtp, lsnp, op, info, 1));
+}
+
+int
+__fop_rename_noundo_60_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	return (__fop_rename_60_recover_int(env, dbtp, lsnp, op, info, 0));
+}
+
+static int
+__fop_rename_60_recover_int(env, dbtp, lsnp, op, info, undo)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+	int undo;
+{
+	__fop_rename_60_args *argp;
+	APPNAME appname;
+	DB_FH *fhp;
+	DBMETA *meta;
+	u_int8_t *fileid, mbuf[DBMETASIZE];
+	int ret;
+	char *real_new, *real_old, *src;
+	const char *dirname;
+
+	COMPQUIET(info, NULL);
+
+	fhp = NULL;
+	meta = (DBMETA *)&mbuf[0];
+	ret = 0;
+	real_new = real_old = NULL;
+
+	REC_PRINT(__fop_rename_60_print);
+	REC_NOOP_INTRO(__fop_rename_60_read);
+	fileid = argp->fileid.data;
+
+	if (argp->dirname.size == 0)
+		dirname = NULL;
+	else
+		dirname = (const char *)argp->dirname.data;
+
+
+	appname = __fop_convert_appname(env, (APPNAME53)argp->appname);
+	if (appname == DB_APP_DATA)
+		appname = DB_APP_RECOVER;
+
+	if ((ret = __db_appname(env, appname, (const char *)argp->newname.data,
+	    &dirname, &real_new)) != 0)
+		goto out;
+	if ((ret = __db_appname(env, appname, (const char *)argp->oldname.data,
+	    &dirname, &real_old)) != 0)
+		goto out;
+
+	/*
+	 * Verify that we are manipulating the correct file.  We should always
+	 * be OK on an ABORT or an APPLY, but during recovery, we have to
+	 * check.
+	 */
+	if (op != DB_TXN_ABORT && op != DB_TXN_APPLY) {
+		src = DB_UNDO(op) ? real_new : real_old;
+		/*
+		 * Interpret any error as meaning that the file either doesn't
+		 * exist, doesn't have a meta-data page, or is in some other
+		 * way, shape or form, incorrect, so that we should not restore
+		 * it.
+		 */
+		if (__os_open(env, src, 0, 0, 0, &fhp) != 0)
+			goto done;
+		if (__fop_read_meta(env,
+		    src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0)
+			goto done;
+		if (__db_chk_meta(env, NULL, meta, DB_CHK_META) != 0)
 			goto done;
 		if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0)
 			goto done;
@@ -425,7 +989,7 @@ __fop_rename_recover_int(env, dbtp, lsnp, op, info, undo)
 			if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 &&
 			    __fop_read_meta(env, src, mbuf,
 			    DBMETASIZE, fhp, 1, NULL) == 0 &&
-			    __db_chk_meta(env, NULL, meta, 1) == 0 &&
+			    __db_chk_meta(env, NULL, meta, DB_CHK_META) == 0 &&
 			    memcmp(argp->fileid.data,
 			    meta->uid, DB_FILE_ID_LEN) != 0) {
 				(void)__memp_nameop(env,
@@ -501,6 +1065,7 @@ __fop_rename_42_recover_int(env, dbtp, lsnp, op, info, undo)
 	DB_FH *fhp;
 	DBMETA *meta;
 	u_int8_t *fileid, mbuf[DBMETASIZE];
+	APPNAME appname;
 	int ret;
 	char *real_new, *real_old, *src;
 
@@ -515,10 +1080,11 @@ __fop_rename_42_recover_int(env, dbtp, lsnp, op, info, undo)
 	REC_NOOP_INTRO(__fop_rename_read);
 	fileid = argp->fileid.data;
 
-	if ((ret = __db_appname(env, (APPNAME)argp->appname,
+	appname = __fop_convert_appname(env, (APPNAME53)argp->appname);
+	if ((ret = __db_appname(env, appname,
 	    (const char *)argp->newname.data, NULL, &real_new)) != 0)
 		goto out;
-	if ((ret = __db_appname(env, (APPNAME)argp->appname,
+	if ((ret = __db_appname(env, appname,
 	    (const char *)argp->oldname.data, NULL, &real_old)) != 0)
 		goto out;
 
@@ -540,7 +1106,7 @@ __fop_rename_42_recover_int(env, dbtp, lsnp, op, info, undo)
 		if (__fop_read_meta(env,
 		    src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0)
 			goto done;
-		if (__db_chk_meta(env, NULL, meta, 1) != 0)
+		if (__db_chk_meta(env, NULL, meta, DB_CHK_META) != 0)
 			goto done;
 		if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0)
 			goto done;
@@ -557,7 +1123,7 @@ __fop_rename_42_recover_int(env, dbtp, lsnp, op, info, undo)
 			if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 &&
 			    __fop_read_meta(env, src, mbuf,
 			    DBMETASIZE, fhp, 1, NULL) == 0 &&
-			    __db_chk_meta(env, NULL, meta, 1) == 0 &&
+			    __db_chk_meta(env, NULL, meta, DB_CHK_META) == 0 &&
 			    memcmp(argp->fileid.data,
 			    meta->uid, DB_FILE_ID_LEN) != 0) {
 				(void)__memp_nameop(env,
@@ -652,7 +1218,115 @@ __fop_file_remove_recover(env, dbtp, lsnp, op, info)
 		 * We can ignore errors here since we'll simply fail the
 		 * checks below and assume this is the wrong file.
 		 */
-		(void)__db_chk_meta(env, NULL, meta, 1);
+		(void)__db_chk_meta(env, NULL, meta, DB_CHK_META);
+		is_real =
+		    memcmp(argp->real_fid.data, meta->uid, DB_FILE_ID_LEN) == 0;
+		is_tmp =
+		    memcmp(argp->tmp_fid.data, meta->uid, DB_FILE_ID_LEN) == 0;
+
+		if (!is_real && !is_tmp)
+			/* File exists, but isn't what we were removing. */
+			cstat = TXN_IGNORE;
+		else
+			/* File exists and is the one that we were removing. */
+			cstat = TXN_COMMIT;
+	}
+	if (fhp != NULL) {
+		(void)__os_closehandle(env, fhp);
+		fhp = NULL;
+	}
+
+	if (DB_UNDO(op)) {
+		/* On the backward pass, we leave a note for the child txn. */
+		if ((ret = __db_txnlist_update(env,
+		    info, argp->child, cstat, NULL, &ret_stat, 1)) != 0)
+			goto out;
+	} else if (DB_REDO(op)) {
+		/*
+		 * On the forward pass, check if someone recreated the
+		 * file while we weren't looking.
+		 */
+		if (cstat == TXN_COMMIT)
+			(void)__memp_nameop(env,
+			    is_real ? argp->real_fid.data : argp->tmp_fid.data,
+			    NULL, real_name, NULL, 0);
+	}
+
+done:	*lsnp = argp->prev_lsn;
+	ret = 0;
+
+out:	if (real_name != NULL)
+		__os_free(env, real_name);
+	if (fhp != NULL)
+		(void)__os_closehandle(env, fhp);
+	REC_NOOP_CLOSE;
+}
+
+/*
+ * __fop_file_remove_60_recover --
+ *
+ * PUBLIC: int __fop_file_remove_60_recover
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__fop_file_remove_60_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__fop_file_remove_60_args *argp;
+	DBMETA *meta;
+	DB_FH *fhp;
+	size_t len;
+	u_int8_t mbuf[DBMETASIZE];
+	u_int32_t cstat, ret_stat;
+	APPNAME appname;
+	int is_real, is_tmp, ret;
+	char *real_name;
+
+	fhp = NULL;
+	meta = (DBMETA *)&mbuf[0];
+	is_real = is_tmp = 0;
+	real_name = NULL;
+	REC_PRINT(__fop_file_remove_60_print);
+	REC_NOOP_INTRO(__fop_file_remove_60_read);
+
+	/*
+	 * This record is only interesting on the backward, forward, and
+	 * apply phases.
+	 */
+	if (op != DB_TXN_BACKWARD_ROLL &&
+	    op != DB_TXN_FORWARD_ROLL && op != DB_TXN_APPLY)
+		goto done;
+
+	appname = __fop_convert_appname(env, (APPNAME53)argp->appname);
+	if ((ret = __db_appname(env, appname,
+	    argp->name.data, NULL, &real_name)) != 0)
+		goto out;
+
+	/* Verify that we are manipulating the correct file.  */
+	len = 0;
+	if (__os_open(env, real_name, 0, 0, 0, &fhp) != 0 ||
+	    (ret = __fop_read_meta(env, real_name,
+	    mbuf, DBMETASIZE, fhp, 1, &len)) != 0) {
+		/*
+		 * If len is non-zero, then the file exists and has something
+		 * in it, but that something isn't a full meta-data page, so
+		 * this is very bad.  Bail out!
+		 */
+		if (len != 0)
+			goto out;
+
+		/* File does not exist. */
+		cstat = TXN_EXPECTED;
+	} else {
+		/*
+		 * We can ignore errors here since we'll simply fail the
+		 * checks below and assume this is the wrong file.
+		 */
+		(void)__db_chk_meta(env, NULL, meta, DB_CHK_META);
 		is_real =
 		    memcmp(argp->real_fid.data, meta->uid, DB_FILE_ID_LEN) == 0;
 		is_tmp =
@@ -695,3 +1369,4 @@ out:	if (real_name != NULL)
 		(void)__os_closehandle(env, fhp);
 	REC_NOOP_CLOSE;
 }
+
diff --git a/src/fileops/fop_util.c b/src/fileops/fop_util.c
index 1925ffd1..d51aba0f 100644
--- a/src/fileops/fop_util.c
+++ b/src/fileops/fop_util.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -24,9 +24,10 @@ static int __fop_inmem_read_meta __P((DB *, DB_TXN *, const char *, u_int32_t,
 	    u_int32_t));
 static int __fop_inmem_swap __P((DB *, DB *, DB_TXN *,
 	       const char *, const char *, const char *, DB_LOCKER *));
-static int __fop_ondisk_dummy __P((DB *, DB_TXN *, const char *, u_int8_t *));
+static int __fop_ondisk_dummy __P((
+		DB *, DB_TXN *, const char *, u_int8_t *, APPNAME));
 static int __fop_ondisk_swap __P((DB *, DB *, DB_TXN *,
-	     const char *, const char *, const char *, DB_LOCKER *));
+	     const char *, const char *, const char *, DB_LOCKER *, APPNAME));
 
 /*
  * Acquire the environment meta-data lock.  The parameters are the
@@ -115,7 +116,7 @@ __fop_lock_handle(env, dbp, locker, mode, elockp, flags)
 	/*
 	 * If we are in recovery, the only locking we should be
 	 * doing is on the global environment.  The one exception
-	 * is if we are opening an exclusive database on a client 
+	 * is if we are opening an exclusive database on a client
 	 * syncing with the master.
 	 */
 	if (IS_RECOVERING(env) && !F2_ISSET(dbp, DB2_AM_INTEXCL))
@@ -234,8 +235,8 @@ __fop_file_setup(dbp, ip, txn, name, mode, flags, retidp)
 	real_name = real_tmpname = tmpname = NULL;
 	dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0;
 	aflags = LF_ISSET(DB_INTERNAL_PERSISTENT_DB) ? DB_APP_META :
-	    (LF_ISSET(DB_INTERNAL_TEMPORARY_DB) ? DB_APP_NONE : DB_APP_DATA);
-	LF_CLR(DB_INTERNAL_PERSISTENT_DB | DB_INTERNAL_TEMPORARY_DB);
+	    (LF_ISSET(DB_INTERNAL_BLOB_DB) ? DB_APP_BLOB :
+	    (LF_ISSET(DB_INTERNAL_TEMPORARY_DB) ? DB_APP_NONE : DB_APP_DATA));
 
 	ret = 0;
 	retries = 0;
@@ -394,14 +395,14 @@ reopen:		if (!F_ISSET(dbp, DB_AM_INMEM) && (ret =
 				goto done;
 			}
 
-			/* 
+			/*
 			 * Case 4: This is a valid file.  Now check the
-			 * checksum and decrypt the file so the file 
+			 * checksum and decrypt the file so the file
 			 * id can be obtained for the handle lock.  Note that
 			 * the checksum can fail if the database is being
 			 * written (possible because the handle lock has
 			 * not been obtained yet).  So on checksum fail retry
-			 * until the checksum succeeds or the number of 
+			 * until the checksum succeeds or the number of
 			 * retries is exhausted, then throw an error.
 			 */
 			if (ret == 0 && (ret = __db_chk_meta(env, dbp,
@@ -410,7 +411,7 @@ reopen:		if (!F_ISSET(dbp, DB_AM_INMEM) && (ret =
 					ret = t_ret;
 					goto err;
 				}
-				/* 
+				/*
 				 * Retry unless the number of retries is
 				 * exhausted.
 				 */
@@ -423,8 +424,7 @@ reopen:		if (!F_ISSET(dbp, DB_AM_INMEM) && (ret =
 						ret = EINVAL;
 					goto err;
 				}
-				if ((ret = __os_closehandle(env, fhp)) != 0)
-					goto err;
+				CLOSE_HANDLE(dbp, fhp);
 				goto retry;
 			}
 			/* Get the file id for the handle lock. */
@@ -464,11 +464,8 @@ reopen:		if (!F_ISSET(dbp, DB_AM_INMEM) && (ret =
 			 * any application level FCNTL semantics.
 			 */
 			DB_ASSERT(env, !LF_ISSET(DB_FCNTL_LOCKING));
-			if (!F_ISSET(dbp, DB_AM_INMEM)) {
-				if ((ret = __os_closehandle(env, fhp)) != 0)
-					goto err;
-				fhp = NULL;
-			}
+			if (!F_ISSET(dbp, DB_AM_INMEM))
+				CLOSE_HANDLE(dbp, fhp);
 			if ((ret = __fop_lock_handle(env,
 			    dbp, locker, lockmode, &elock, 0)) != 0) {
 				if (F_ISSET(dbp, DB_AM_INMEM))
@@ -495,7 +492,7 @@ reopen:		if (!F_ISSET(dbp, DB_AM_INMEM) && (ret =
 
 		}
 
-		/* 
+		/*
 		 * If we got here, then we have the handle lock, it is now
 		 * safe to check the rest of the meta data, since the file
 		 * will not be deleted out from under the handle.
@@ -505,7 +502,7 @@ reopen:		if (!F_ISSET(dbp, DB_AM_INMEM) && (ret =
 			    dbp, txn, name, flags, DB_SKIP_CHK)) != 0)
 				goto err;
 		} else {
-			if ((ret = __db_meta_setup(env, dbp, real_name, 
+			if ((ret = __db_meta_setup(env, dbp, real_name,
 			    (DBMETA *)mbuf, flags, DB_SKIP_CHK)) != 0)
 				goto err;
 		}
@@ -524,9 +521,8 @@ reopen:		if (!F_ISSET(dbp, DB_AM_INMEM) && (ret =
 			if (create_ok) {
 				if (F_ISSET(dbp, DB_AM_INMEM)) {
 					RESET_MPF(dbp, DB_MPOOL_DISCARD);
-				} else if ((ret =
-				    __os_closehandle(env, fhp)) != 0)
-					goto err;
+				} else
+					CLOSE_HANDLE(dbp, fhp);
 				LF_SET(DB_CREATE);
 				goto create;
 			} else {
@@ -856,6 +852,7 @@ retry:	if ((ret = __db_master_open(dbp,
 	/* Copy the pagesize and set the sub-database flag. */
 	dbp->pgsize = mdbp->pgsize;
 	F_SET(dbp, DB_AM_SUBDB);
+	dbp->blob_file_id = mdbp->blob_file_id;
 
 	if (name != NULL && (ret = __db_master_update(mdbp, dbp,
 	    ip, txn, name, dbp->type, MU_OPEN, NULL, flags)) != 0) {
@@ -881,6 +878,8 @@ retry:	if ((ret = __db_master_open(dbp,
 
 	DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, mname);
 
+	dbp->dirname = mdbp->dirname;
+
 	/*
 	 * We copy our fileid from our master so that we all open
 	 * the same file in mpool.  We'll use the meta-pgno to lock
@@ -1174,13 +1173,14 @@ err:
  * remove).
  *
  * PUBLIC: int __fop_dummy __P((DB *,
- * PUBLIC:     DB_TXN *, const char *, const char *));
+ * PUBLIC:     DB_TXN *, const char *, const char *, APPNAME));
  */
 int
-__fop_dummy(dbp, txn, old, new)
+__fop_dummy(dbp, txn, old, new, appname)
 	DB *dbp;
 	DB_TXN *txn;
 	const char *old, *new;
+	APPNAME appname;
 {
 	DB *tmpdbp;
 	DB_TXN *stxn;
@@ -1214,17 +1214,19 @@ __fop_dummy(dbp, txn, old, new)
 	if (F_ISSET(dbp, DB_AM_NOT_DURABLE) &&
 		(ret = __db_set_flags(tmpdbp, DB_TXN_NOT_DURABLE)) != 0)
 		goto err;
+	tmpdbp->dirname = dbp->dirname;
 	memset(mbuf, 0, sizeof(mbuf));
 	ret = F_ISSET(dbp, DB_AM_INMEM) ?
 	    __fop_inmem_dummy(tmpdbp, stxn, back, mbuf) :
-	    __fop_ondisk_dummy(tmpdbp, stxn, back, mbuf);
+	    __fop_ondisk_dummy(tmpdbp, stxn, back, mbuf, appname);
 
 	if (ret != 0)
 		goto err;
 
 	ret = F_ISSET(dbp, DB_AM_INMEM) ?
 	    __fop_inmem_swap(dbp, tmpdbp, stxn, old, new, back, txn->locker) :
-	    __fop_ondisk_swap(dbp, tmpdbp, stxn, old, new, back, txn->locker);
+	    __fop_ondisk_swap(
+		dbp, tmpdbp, stxn, old, new, back, txn->locker, appname);
 	stxn = NULL;
 	if (ret != 0)
 		goto err;
@@ -1246,12 +1248,13 @@ err:	if (stxn != NULL)
  * and the subsequent calls in __db_rename do the work for the
  * transactional case).
  *
- * PUBLIC: int __fop_dbrename __P((DB *, const char *, const char *));
+ * PUBLIC: int __fop_dbrename __P((DB *, const char *, const char *, APPNAME));
  */
 int
-__fop_dbrename(dbp, old, new)
+__fop_dbrename(dbp, old, new, appname)
 	DB *dbp;
 	const char *old, *new;
+	APPNAME appname;
 {
 	DB_LOCK elock;
 	ENV *env;
@@ -1269,11 +1272,11 @@ __fop_dbrename(dbp, old, new)
 	} else {
 		/* Get full names. */
 		if ((ret = __db_appname(env,
-		    DB_APP_DATA, old, &dbp->dirname, &real_old)) != 0)
+		    appname, old, &dbp->dirname, &real_old)) != 0)
 			goto err;
 
 		if ((ret = __db_appname(env,
-		    DB_APP_DATA, new, &dbp->dirname, &real_new)) != 0)
+		    appname, new, &dbp->dirname, &real_new)) != 0)
 			goto err;
 	}
 
@@ -1414,9 +1417,11 @@ __fop_inmem_read_meta(dbp, txn, name, flags, chkflags)
 		if ((ret = __db_chk_meta(dbp->env, dbp, metap, chkflags)) == 0)
 			memcpy(dbp->fileid,
 			    ((DBMETA *)metap)->uid, DB_FILE_ID_LEN);
-	} else 
+	} else
 		ret = __db_meta_setup(
 		    dbp->env, dbp, name, metap, flags, chkflags);
+	if (ret == DB_CHKSUM_FAIL)
+		ret = DB_META_CHKSUM_FAIL;
 
 	if ((t_ret =
 	    __memp_fput(dbp->mpf, ip, metap, dbp->priority)) && ret == 0)
@@ -1426,11 +1431,12 @@ __fop_inmem_read_meta(dbp, txn, name, flags, chkflags)
 }
 
 static int
-__fop_ondisk_dummy(dbp, txn, name, mbuf)
+__fop_ondisk_dummy(dbp, txn, name, mbuf, appname)
 	DB *dbp;
 	DB_TXN *txn;
 	const char *name;
 	u_int8_t *mbuf;
+	APPNAME appname;
 {
 	ENV *env;
 	int ret;
@@ -1442,11 +1448,11 @@ __fop_ondisk_dummy(dbp, txn, name, mbuf)
 	dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0;
 
 	if ((ret = __db_appname(env,
-	    DB_APP_DATA, name, &dbp->dirname, &realname)) != 0)
+	    appname, name, &dbp->dirname, &realname)) != 0)
 		goto err;
 
 	if ((ret = __fop_create(env,
-	    txn, NULL, name, &dbp->dirname, DB_APP_DATA, 0, dflags)) != 0)
+	    txn, NULL, name, &dbp->dirname, appname, 0, dflags)) != 0)
 		goto err;
 
 	if ((ret =
@@ -1455,7 +1461,7 @@ __fop_ondisk_dummy(dbp, txn, name, mbuf)
 
 	((DBMETA *)mbuf)->magic = DB_RENAMEMAGIC;
 	if ((ret = __fop_write(env, txn, name, dbp->dirname,
-	    DB_APP_DATA, NULL, 0, 0, 0, mbuf, DBMETASIZE, 1, dflags)) != 0)
+	    appname, NULL, 0, 0, 0, mbuf, DBMETASIZE, 1, dflags)) != 0)
 		goto err;
 
 	memcpy(dbp->fileid, ((DBMETA *)mbuf)->uid, DB_FILE_ID_LEN);
@@ -1511,11 +1517,12 @@ err:	return (ret);
 }
 
 static int
-__fop_ondisk_swap(dbp, tmpdbp, txn, old, new, back, locker)
+__fop_ondisk_swap(dbp, tmpdbp, txn, old, new, back, locker, appname)
 	DB *dbp, *tmpdbp;
 	DB_TXN *txn;
 	const char *old, *new, *back;
 	DB_LOCKER *locker;
+	APPNAME appname;
 {
 	DBT fiddbt, namedbt, tmpdbt;
 	DB_FH *fhp;
@@ -1538,7 +1545,7 @@ __fop_ondisk_swap(dbp, tmpdbp, txn, old, new, back, locker)
 	dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0;
 
 	if ((ret = __db_appname(env,
-	    DB_APP_DATA, new, &dbp->dirname, &realnew)) != 0)
+	    appname, new, &dbp->dirname, &realnew)) != 0)
 		goto err;
 
 	/* Now, lock the name space while we initialize this file. */
@@ -1634,10 +1641,10 @@ retry:	GET_ENVLOCK(env, locker, &elock);
 	 * swap for the handle lock.
 	 */
 	if ((ret = __fop_rename(env, txn,
-	    old, new, &dbp->dirname, dbp->fileid, DB_APP_DATA, 1, dflags)) != 0)
+	    old, new, &dbp->dirname, dbp->fileid, appname, 1, dflags)) != 0)
 		goto err;
 	if ((ret = __fop_rename(env, txn, back, old,
-	    &dbp->dirname, tmpdbp->fileid, DB_APP_DATA, 0, dflags)) != 0)
+	    &dbp->dirname, tmpdbp->fileid, appname, 0, dflags)) != 0)
 		goto err;
 	if ((ret = __fop_lock_handle(env,
 	    tmpdbp, locker, DB_LOCK_WRITE, &elock, NOWAIT_FLAG(txn))) != 0)
@@ -1673,12 +1680,12 @@ retry:	GET_ENVLOCK(env, locker, &elock);
 	DB_INIT_DBT(namedbt, old, strlen(old) + 1);
 	if ((t_ret = __fop_file_remove_log(env,
 	    parent, &lsn, dflags, &fiddbt, &tmpdbt, &namedbt,
-	    (u_int32_t)DB_APP_DATA, child_txnid)) != 0 && ret == 0)
+	    (u_int32_t)appname, child_txnid)) != 0 && ret == 0)
 		ret = t_ret;
 
 	/* This is a delayed delete of the dummy file. */
 	if ((ret = __db_appname(env,
-	    DB_APP_DATA, old, &dbp->dirname, &realold)) != 0)
+	    appname, old, &dbp->dirname, &realold)) != 0)
 		goto err;
 
 	if ((ret = __txn_remevent(env, parent, realold, NULL, 0)) != 0)
diff --git a/src/hash/hash.c b/src/hash/hash.c
index ae5736e7..5bff1dee 100644
--- a/src/hash/hash.c
+++ b/src/hash/hash.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994
@@ -298,6 +298,7 @@ __hamc_count(dbc, recnop)
 	}
 
 	switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) {
+	case H_BLOB:
 	case H_KEYDATA:
 	case H_OFFPAGE:
 		recno = 1;
@@ -379,7 +380,7 @@ __hamc_del(dbc, flags)
 	hcp = (HASH_CURSOR *)dbc->internal;
 
 	if (F_ISSET(hcp, H_DELETED))
-		return (DB_NOTFOUND);
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 
 	if ((ret = __ham_get_meta(dbc)) != 0)
 		goto out;
@@ -535,7 +536,7 @@ next:			ret = __ham_item_next(dbc, lock_type, pgnop);
 	case DB_CURRENT:
 		/* cgetchk has already determined that the cursor is set. */
 		if (F_ISSET(hcp, H_DELETED)) {
-			ret = DB_KEYEMPTY;
+			ret = DBC_ERR(dbc, DB_KEYEMPTY);
 			goto err;
 		}
 
@@ -554,7 +555,8 @@ next:			ret = __ham_item_next(dbc, lock_type, pgnop);
 		if (ret != 0 && ret != DB_NOTFOUND)
 			goto err;
 		else if (F_ISSET(hcp, H_OK)) {
-			if (*pgnop == PGNO_INVALID)
+			if (*pgnop == PGNO_INVALID && HPAGE_PTYPE(
+			    H_PAIRDATA(dbp, hcp->page, hcp->indx)) != H_BLOB)
 				ret = __ham_dup_return(dbc, data, flags);
 			break;
 		} else if (!F_ISSET(hcp, H_NOMORE)) {
@@ -576,7 +578,7 @@ next:			ret = __ham_item_next(dbc, lock_type, pgnop);
 			    dbc->thread_info, hcp->page, dbc->priority);
 			hcp->page = NULL;
 			if (hcp->bucket == 0) {
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				hcp->pgno = PGNO_INVALID;
 				goto err;
 			}
@@ -598,7 +600,7 @@ next:			ret = __ham_item_next(dbc, lock_type, pgnop);
 			F_CLR(hcp, H_ISDUP);
 			hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
 			if (hcp->bucket > hcp->hdr->max_bucket) {
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				hcp->pgno = PGNO_INVALID;
 				goto err;
 			}
@@ -612,7 +614,7 @@ next:			ret = __ham_item_next(dbc, lock_type, pgnop);
 		case DB_SET:
 		case DB_SET_RANGE:
 			/* Key not found. */
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		case DB_CURRENT:
 			/*
@@ -621,7 +623,7 @@ next:			ret = __ham_item_next(dbc, lock_type, pgnop);
 			 * locking.  We return the same error code as we would
 			 * if the cursor were deleted.
 			 */
-			ret = DB_KEYEMPTY;
+			ret = DBC_ERR(dbc, DB_KEYEMPTY);
 			goto err;
 		default:
 			DB_ASSERT(env, 0);
@@ -649,11 +651,14 @@ __ham_bulk(dbc, data, flags)
 	DB *dbp;
 	DB_MPOOLFILE *mpf;
 	HASH_CURSOR *cp;
+	HBLOB hblob;
 	PAGE *pg;
 	db_indx_t dup_len, dup_off, dup_tlen, indx, *inp;
 	db_lockmode_t lock_mode;
 	db_pgno_t pgno;
+	off_t blob_size;
 	int32_t *endp, *offp, *saveoff;
+	db_seq_t blob_id;
 	u_int32_t key_off, key_size, pagesize, size, space;
 	u_int8_t *dbuf, *dp, *hk, *np, *tmp;
 	int is_dup, is_key;
@@ -708,6 +713,10 @@ next_pg:
 				space -= key_size;
 				key_off = (u_int32_t)(np - dbuf);
 				np += key_size;
+			} else if (HPAGE_PTYPE(hk) == H_BLOB) {
+				__db_errx(dbp->env, DB_STR("1185",
+				    "Blob item key."));
+				(void)__env_panic(dbp->env, DB_RUNRECOVERY);
 			} else {
 				if (need_pg) {
 					dp = np;
@@ -982,6 +991,38 @@ get_space:
 			np += size;
 			space -= size;
 			break;
+		case H_BLOB:
+			space -= (is_key ? 4 : 2) * sizeof(*offp);
+			if (space > data->ulen)
+				goto back_up;
+
+			memcpy(&hblob, hk, HBLOB_SIZE);
+			blob_id = (db_seq_t)hblob.id;
+			GET_BLOB_SIZE(dbc->env, hblob, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			if (blob_size > UINT32_MAX) {
+				size = UINT32_MAX;
+				goto back_up;
+			}
+			size = (u_int32_t)blob_size;
+			if (size > space)
+				goto back_up;
+
+			if ((ret = __blob_bulk(dbc, size, blob_id, np)) != 0)
+				return (ret);
+
+			if (is_key) {
+				*offp-- = (int32_t)key_off;
+				*offp-- = (int32_t)key_size;
+			}
+
+			*offp-- = (int32_t)(np - dbuf);
+			*offp-- = (int32_t)size;
+
+			np += size;
+			space -= size;
+			break;
 		default:
 			/* Do nothing. */
 			break;
@@ -1014,7 +1055,7 @@ get_space:
 			 * DBC->get(DB_NEXT) will return DB_NOTFOUND.
 			 */
 			cp->bucket--;
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 		} else {
 			/*
 			 * Start on the next bucket.
@@ -1071,7 +1112,7 @@ __hamc_put(dbc, key, data, flags, pgnop)
 
 	if (F_ISSET(hcp, H_DELETED) && flags != DB_KEYFIRST &&
 	    flags != DB_KEYLAST && flags != DB_OVERWRITE_DUP)
-		return (DB_NOTFOUND);
+		return (DBC_ERR(dbc, DB_NOTFOUND));
 
 	if ((ret = __ham_get_meta(dbc)) != 0)
 		goto err1;
@@ -1083,9 +1124,15 @@ __hamc_put(dbc, key, data, flags, pgnop)
 	case DB_NOOVERWRITE:
 	case DB_OVERWRITE_DUP:
 		nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE :
-		    HKEYDATA_PSIZE(key->size)) +
-		    (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE :
-		    HKEYDATA_PSIZE(data->size));
+		    HKEYDATA_PSIZE(key->size));
+		if (dbp->blob_threshold && (data->size >=
+		    dbp->blob_threshold || F_ISSET(data, DB_DBT_BLOB)))
+			nbytes += HBLOB_PSIZE;
+		else if (ISBIG(hcp, data->size))
+			nbytes += HOFFPAGE_PSIZE;
+		else
+			nbytes += HKEYDATA_PSIZE(data->size);
+
 		if ((ret = __ham_lookup(dbc,
 		    key, nbytes, DB_LOCK_WRITE, pgnop)) == DB_NOTFOUND) {
 			if (hcp->seek_found_page != PGNO_INVALID &&
@@ -1124,7 +1171,7 @@ __hamc_put(dbc, key, data, flags, pgnop)
 		} else if (ret == 0 && flags == DB_NOOVERWRITE &&
 		    !F_ISSET(hcp, H_DELETED)) {
 			if (*pgnop == PGNO_INVALID)
-				ret = DB_KEYEXIST;
+				ret = DBC_ERR(dbc, DB_KEYEXIST);
 			else
 				ret = __bam_opd_exists(dbc, *pgnop);
 			if (ret != 0)
@@ -1468,6 +1515,7 @@ __ham_dup_return(dbc, val, flags)
 	type = HPAGE_TYPE(dbp, hcp->page, ndx);
 	pp = hcp->page;
 	myval = val;
+	cmp = 0;
 
 	/*
 	 * There are 4 cases:
@@ -1545,9 +1593,13 @@ __ham_dup_return(dbc, val, flags)
 				memcpy(&pgno,
 				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
 				if ((ret = __db_moff(dbc, val, pgno, tlen,
-				    dbp->dup_compare, &cmp)) != 0)
+				    dbp->dup_compare, &cmp, NULL)) != 0)
 					return (ret);
 				cmp = -cmp;
+			} else if (((HKEYDATA *)hk)->type == H_BLOB) {
+				__db_errx(dbp->env, DB_STR("1186",
+		    "Error - found a blob file in a duplicate data set."));
+				(void)__env_panic(dbp->env, DB_RUNRECOVERY);
 			} else {
 				/*
 				 * We do not zero tmp_val since the comparison
@@ -1557,8 +1609,8 @@ __ham_dup_return(dbc, val, flags)
 				tmp_val.size = LEN_HDATA(dbp, hcp->page,
 				    dbp->pgsize, hcp->indx);
 				cmp = dbp->dup_compare == NULL ?
-				    __bam_defcmp(dbp, &tmp_val, val) :
-				    dbp->dup_compare(dbp, &tmp_val, val);
+				    __bam_defcmp(dbp, &tmp_val, val, NULL) :
+				    dbp->dup_compare(dbp, &tmp_val, val, NULL);
 			}
 
 			if (cmp > 0 && flags == DB_GET_BOTH_RANGE &&
@@ -1567,7 +1619,7 @@ __ham_dup_return(dbc, val, flags)
 		}
 
 		if (cmp != 0)
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 	}
 
 	/*
@@ -1654,17 +1706,21 @@ __ham_overwrite(dbc, nval, flags)
 	u_int32_t flags;
 {
 	DB *dbp;
-	DBT *myval, tmp_val, tmp_val2;
+	DBT *myval, tmp_val, tmp_val2, old_rec, new_rec;
 	ENV *env;
 	HASH_CURSOR *hcp;
+	HBLOB hblob;
 	void *newrec;
 	u_int8_t *hk, *p;
 	u_int32_t len, nondup_size;
+	db_seq_t blob_id, new_blob_id;
 	db_indx_t newsize;
+	off_t blob_size;
 	int ret;
 
 	dbp = dbc->dbp;
 	env = dbp->env;
+	ret = 0;
 	hcp = (HASH_CURSOR *)dbc->internal;
 	if (F_ISSET(hcp, H_ISDUP)) {
 		/*
@@ -1717,7 +1773,7 @@ __ham_overwrite(dbc, nval, flags)
 				    NULL, nval, flags, NULL));
 			}
 
-			if ((ret = __os_malloc(dbp->env,
+			if ((ret = __os_malloc(env,
 			    DUP_SIZE(newsize), &newrec)) != 0)
 				return (ret);
 			memset(&tmp_val2, 0, sizeof(tmp_val2));
@@ -1765,7 +1821,7 @@ __ham_overwrite(dbc, nval, flags)
 				    (u_int8_t *)newrec + sizeof(db_indx_t);
 				tmp_val2.size = newsize;
 				if (dbp->dup_compare(
-				    dbp, &tmp_val, &tmp_val2) != 0) {
+				    dbp, &tmp_val, &tmp_val2, NULL) != 0) {
 					__os_free(env, newrec);
 					return (__db_duperr(dbp, flags));
 				}
@@ -1816,7 +1872,7 @@ __ham_overwrite(dbc, nval, flags)
 				    sizeof(db_indx_t);
 				tmp_val2.size = hcp->dup_len;
 				if (dbp->dup_compare(
-				    dbp, nval, &tmp_val2) != 0) {
+				    dbp, nval, &tmp_val2, NULL) != 0) {
 					__db_errx(env, DB_STR("1131",
 			    "Existing data sorts differently from put data"));
 					return (EINVAL);
@@ -1848,16 +1904,84 @@ __ham_overwrite(dbc, nval, flags)
 			hcp->dup_len = (db_indx_t)nval->size;
 		}
 		myval = &tmp_val;
+		goto end;
+	}
+	hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
+	if (HPAGE_PTYPE(hk) == H_BLOB) {
+		memcpy(&hblob, hk, HBLOB_SIZE);
+		memset(&old_rec, 0, sizeof(DBT));
+		memset(&new_rec, 0, sizeof(DBT));
+		if (DBC_LOGGING(dbc)) {
+			new_rec.data = HKEYDATA_DATA(&hblob);
+			if ((ret = __os_malloc(
+			    env, HBLOB_SIZE, &old_rec.data)) != 0)
+				return (ret);
+			memcpy(old_rec.data,
+			    HKEYDATA_DATA(&hblob), HBLOB_DSIZE);
+			new_rec.size = old_rec.size = HBLOB_DSIZE;
+		}
+		/*
+		* Inserting a blob record instead of blob data, only
+		* used internally by the DB_STREAM api.
+		*/
+		if (F_ISSET(nval, DB_DBT_BLOB_REC)) {
+			DB_ASSERT(env, nval->size == HBLOB_SIZE);
+			DB_ASSERT(env, HPAGE_PTYPE(nval->data) == H_BLOB);
+			memcpy(&hblob, nval->data, nval->size);
+		} else {
+			/*
+			* A blob file overwrite is simpler than other
+			* replace operations. It's simply a matter
+			* deleting the old blob file, and creating a
+			* new one. We may need to be careful of
+			* cursors when we have support for blob
+			* cursors.
+			* That means that we can skip the replpair
+			* call.
+			*/
+			blob_id = (db_seq_t)hblob.id;
+			GET_BLOB_SIZE(env, hblob, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			if ((ret = __blob_repl(dbc,
+			    nval, blob_id, &new_blob_id, &blob_size)) == 0) {
+				SET_BLOB_ID(&hblob, new_blob_id, HBLOB);
+				SET_BLOB_SIZE(&hblob, blob_size, HBLOB);
+			}
+		}
+		if (ret == 0) {
+			if (DBC_LOGGING(dbc)) {
+				if ((ret = __ham_replace_log(dbp,
+				    dbc->txn, &LSN(hcp->page), 0,
+				    PGNO(hcp->page),
+				    (u_int32_t)H_DATAINDEX(hcp->indx),
+				    &LSN(hcp->page), 0,
+				    OP_SET(H_BLOB, hcp->page), &old_rec,
+				    OP_SET(H_BLOB, hcp->page),
+				    &new_rec)) != 0) {
+					memcpy(HKEYDATA_DATA(&hblob),
+					    old_rec.data, HBLOB_DSIZE);
+					__os_free(env, old_rec.data);
+					return (ret);
+				}
+
+			} else
+				LSN_NOT_LOGGED(LSN(hcp->page));
+		}
+		/* Copy the updated blob data back to the page. */
+		memcpy(hk, &hblob, HBLOB_SIZE);
+		if (old_rec.data != NULL)
+			__os_free(env, old_rec.data);
+		return (ret);
 	} else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
 		/* Put/overwrite */
 		memcpy(&tmp_val, nval, sizeof(*nval));
 		F_SET(&tmp_val, DB_DBT_PARTIAL);
 		tmp_val.doff = 0;
-		hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
-		if (HPAGE_PTYPE(hk) == H_OFFPAGE)
+		if (HPAGE_PTYPE(hk) == H_OFFPAGE) {
 			memcpy(&tmp_val.dlen,
 			    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
-		else
+		} else
 			tmp_val.dlen = LEN_HDATA(dbp, hcp->page,
 			    hcp->hdr->dbmeta.pagesize, hcp->indx);
 		myval = &tmp_val;
@@ -1865,7 +1989,7 @@ __ham_overwrite(dbc, nval, flags)
 		/* Regular partial put */
 		myval = nval;
 
-	return (__ham_replpair(dbc, myval,
+end:	return (__ham_replpair(dbc, myval,
 	    F_ISSET(hcp, H_ISDUP) ? H_DUPLICATE : H_KEYDATA));
 }
 
@@ -1955,7 +2079,7 @@ __ham_lookup(dbc, key, sought, mode, pgnop)
 			return (ret);
 	}
 	F_SET(hcp, H_NOMORE);
-	return (DB_NOTFOUND);
+	return (DBC_ERR(dbc, DB_NOTFOUND));
 }
 
 /*
diff --git a/src/hash/hash.src b/src/hash/hash.src
index e544c6f3..f56a9c5b 100644
--- a/src/hash/hash.src
+++ b/src/hash/hash.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/hash/hash_compact.c b/src/hash/hash_compact.c
index 83b5ffb1..79fb6004 100644
--- a/src/hash/hash_compact.c
+++ b/src/hash/hash_compact.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  * $Id$
  */
 
@@ -118,7 +118,8 @@ __ham_compact_int(dbc, start, stop, factor, c_data, donep, flags)
 					break;
 				origpgno = pgno;
 				if ((ret = __db_truncate_root(dbc, hcp->page,
-				    H_DATAINDEX(hcp->indx), &pgno, 0)) != 0)
+				    H_DATAINDEX(hcp->indx),
+				    &pgno, 0, &pgs_done)) != 0)
 					break;
 				if (pgno != origpgno) {
 					memcpy(HOFFDUP_PGNO(H_PAIRDATA(dbp,
@@ -247,7 +248,7 @@ __ham_compact_bucket(dbc, c_data, pgs_donep)
 		if (check_trunc && PREV_PGNO(pg) != PGNO_INVALID  &&
 		    PGNO(pg) > c_data->compact_truncate &&
 		    (ret = __db_exchange_page(dbc, &pg,
-		    hcp->page, PGNO_INVALID, DB_EXCH_FREE)) != 0)
+		    hcp->page, PGNO_INVALID, DB_EXCH_FREE, pgs_donep)) != 0)
 			break;
 		if (pgno != PGNO(pg))
 			(*pgs_donep)++;
@@ -400,8 +401,8 @@ __ham_truncate_overflow(dbc, indx, c_data, pgs_done)
 		if ((ret = __memp_dirty(dbp->mpf, &hcp->page,
 		    dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
 			return (ret);
-		if ((ret =
-		     __db_truncate_root(dbc, hcp->page, indx, &pgno, 0)) != 0)
+		if ((ret = __db_truncate_root(dbc,
+		    hcp->page, indx, &pgno, 0, pgs_done)) != 0)
 			return (ret);
 		if (pgno != origpgno) {
 			memcpy(HOFFPAGE_PGNO(P_ENTRY(dbp, hcp->page, indx)),
@@ -410,7 +411,8 @@ __ham_truncate_overflow(dbc, indx, c_data, pgs_done)
 			c_data->compact_pages--;
 		}
 	}
-	if ((ret = __db_truncate_overflow(dbc, pgno, NULL, c_data)) != 0)
+	if ((ret =
+	    __db_truncate_overflow(dbc, pgno, NULL, c_data, pgs_done)) != 0)
 		return (ret);
 	return (0);
 }
@@ -434,10 +436,11 @@ __ham_compact_hash(dbp, ip, txn, c_data)
 	HMETA *meta;
 	PAGE *oldpage;
 	db_pgno_t free_pgno, last_pgno, pgno, start_pgno;
-	int flags, local_txn, ret, t_ret;
+	int flags, local_txn, pgs_done, ret, t_ret;
 	u_int32_t bucket, i, size;
 
 	local_txn = IS_DB_AUTO_COMMIT(dbp, txn);
+	pgs_done = 0;
 	oldpage = NULL;
 	dbc = NULL;
 	LOCK_INIT(lock);
@@ -506,8 +509,8 @@ __ham_compact_hash(dbp, ip, txn, c_data)
 					flags = 0;
 				else
 					flags = DB_EXCH_FREE;
-				if ((ret = __db_exchange_page(dbc,
-				    &oldpage, NULL, free_pgno, flags)) != 0)
+				if ((ret = __db_exchange_page(dbc, &oldpage,
+				    NULL, free_pgno, flags, &pgs_done)) != 0)
 					goto err;
 			} else if (pgno >= last_pgno) {
 				if ((ret = __db_free(dbc, oldpage, 0)) != 0)
@@ -526,7 +529,8 @@ __ham_compact_hash(dbp, ip, txn, c_data)
 	}
 	if (ret == 0 && F_ISSET(dbp, DB_AM_SUBDB) &&
 	    PGNO(hcp->hdr) > c_data->compact_truncate)
-		ret = __db_move_metadata(dbc, (DBMETA**)&hcp->hdr, c_data);
+		ret = __db_move_metadata(dbc, (DBMETA**)&hcp->hdr,
+		    c_data, &pgs_done);
 
 err:	if (oldpage != NULL && (t_ret = __memp_fput(dbp->mpf,
 	    dbc->thread_info, oldpage, dbc->priority)) != 0 && ret == 0)
diff --git a/src/hash/hash_conv.c b/src/hash/hash_conv.c
index fa084f2a..7a53a037 100644
--- a/src/hash/hash_conv.c
+++ b/src/hash/hash_conv.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -104,7 +104,12 @@ __ham_mswap(env, pg)
 	SWAP32(p);		/* h_charkey */
 	for (i = 0; i < NCACHED; ++i)
 		SWAP32(p);	/* spares */
-	p += 59 * sizeof(u_int32_t); /* unused */
+	SWAP32(p);		/* threshold */
+	SWAP32(p);		/* file id lo */
+	SWAP32(p);		/* file id hi */
+	SWAP32(p);		/* sdb id lo */
+	SWAP32(p);		/* sdb id hi */
+	p += 54 * sizeof(u_int32_t); /* unused */
 	SWAP32(p);		/* crypto_magic */
 	return (0);
 }
diff --git a/src/hash/hash_dup.c b/src/hash/hash_dup.c
index 879c33d7..523d7227 100644
--- a/src/hash/hash_dup.c
+++ b/src/hash/hash_dup.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994
@@ -368,6 +368,7 @@ finish:		if (ret == 0) {
 			off += len + 2 * sizeof(db_indx_t);
 		}
 		break;
+	case H_BLOB:
 	default:
 		ret = __db_pgfmt(env, hcp->pgno);
 		break;
@@ -772,7 +773,7 @@ __ham_dsearch(dbc, dbt, offp, cmpp, flags)
 	DBT cur;
 	HASH_CURSOR *hcp;
 	db_indx_t i, len;
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
 	u_int8_t *data;
 
 	dbp = dbc->dbp;
@@ -794,7 +795,7 @@ __ham_dsearch(dbc, dbt, offp, cmpp, flags)
 		 * we're done.  In the latter case, if permitting partial
 		 * matches, it's not a failure.
 		 */
-		*cmpp = func(dbp, dbt, &cur);
+		*cmpp = func(dbp, dbt, &cur, NULL);
 		if (*cmpp == 0)
 			break;
 		if (*cmpp < 0 && dbp->dup_compare != NULL) {
diff --git a/src/hash/hash_func.c b/src/hash/hash_func.c
index baf6061c..1e83b00a 100644
--- a/src/hash/hash_func.c
+++ b/src/hash/hash_func.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993
diff --git a/src/hash/hash_meta.c b/src/hash/hash_meta.c
index d9a35cb4..aefdffb8 100644
--- a/src/hash/hash_meta.c
+++ b/src/hash/hash_meta.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/hash/hash_method.c b/src/hash/hash_method.c
index 1da81e70..a05bcea6 100644
--- a/src/hash/hash_method.c
+++ b/src/hash/hash_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -20,7 +20,7 @@ static int __ham_set_h_hash
 static int __ham_set_h_nelem __P((DB *, u_int32_t));
 
 static int __ham_get_h_compare
-	__P((DB *, int (**)(DB *, const DBT *, const DBT *)));
+	__P((DB *, int (**)(DB *, const DBT *, const DBT *, size_t *)));
 
 /*
  * __ham_db_create --
@@ -153,7 +153,7 @@ __ham_set_h_hash(dbp, func)
 static int
 __ham_get_h_compare(dbp, funcp)
 	DB *dbp;
-	int (**funcp) __P((DB *, const DBT *, const DBT *));
+	int (**funcp) __P((DB *, const DBT *, const DBT *, size_t *));
 {
 	HASH *t;
 
@@ -170,13 +170,13 @@ __ham_get_h_compare(dbp, funcp)
  * __ham_set_h_compare --
  *	Set the comparison function.
  *
- * PUBLIC: int __ham_set_h_compare
- * PUBLIC:         __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+ * PUBLIC: int __ham_set_h_compare __P((DB *,
+ * PUBLIC:     int (*)(DB *, const DBT *, const DBT *, size_t *)));
  */
 int
 __ham_set_h_compare(dbp, func)
 	DB *dbp;
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
 {
 	HASH *t;
 
diff --git a/src/hash/hash_open.c b/src/hash/hash_open.c
index 3d0bb220..0104a57f 100644
--- a/src/hash/hash_open.c
+++ b/src/hash/hash_open.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994
@@ -44,6 +44,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/crypto.h"
 #include "dbinc/db_page.h"
 #include "dbinc/hash.h"
@@ -149,6 +150,7 @@ __ham_metachk(dbp, name, hashm)
 	int ret;
 
 	env = dbp->env;
+	ret = 0;
 
 	/*
 	 * At this point, all we know is that the magic number is for a Hash.
@@ -168,6 +170,7 @@ __ham_metachk(dbp, name, hashm)
 	case 7:
 	case 8:
 	case 9:
+	case 10:
 		break;
 	default:
 		__db_errx(env, DB_STR_A("1126",
@@ -230,6 +233,29 @@ __ham_metachk(dbp, name, hashm)
 	/* Set the page size. */
 	dbp->pgsize = hashm->dbmeta.pagesize;
 
+	dbp->blob_threshold = hashm->blob_threshold;
+	GET_BLOB_FILE_ID(env, hashm, dbp->blob_file_id, ret);
+	if (ret != 0)
+		return (ret);
+	GET_BLOB_SDB_ID(env, hashm, dbp->blob_sdb_id, ret);
+	if (ret != 0)
+		return (ret);
+	/* Blob databases must be upgraded. */
+	if (vers == 9 && (dbp->blob_file_id != 0 || dbp->blob_sdb_id != 0)) {
+	    __db_errx(env, DB_STR_A("1208",
+"%s: databases that support blobs must be upgraded.", "%s"),
+		    name);
+		return (EINVAL);
+	}
+#ifndef HAVE_64BIT_TYPES
+	if (dbp->blob_file_id != 0 || dbp->blob_sdb_id != 0) {
+		__db_errx(env, DB_STR_A("1202",
+		    "%s: blobs require 64 integer compiler support.", "%s"),
+		    name);
+		return (EINVAL);
+	}
+#endif
+
 	/* Copy the file's ID. */
 	memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN);
 
@@ -297,6 +323,9 @@ __ham_init_meta(dbp, meta, pgno, lsnp)
 	meta->nelem = hashp->h_nelem;
 	meta->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY));
 	memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
+	meta->blob_threshold = dbp->blob_threshold;
+	SET_BLOB_META_FILE_ID(meta, dbp->blob_file_id, HMETA);
+	SET_BLOB_META_SDB_ID(meta, dbp->blob_sdb_id, HMETA);
 
 	if (F_ISSET(dbp, DB_AM_DUP))
 		F_SET(&meta->dbmeta, DB_HASH_DUP);
@@ -414,6 +443,12 @@ __ham_new_file(dbp, ip, txn, fhp, name)
 		    F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP));
 		pdbt.data = &pginfo;
 		pdbt.size = sizeof(pginfo);
+		if (dbp->blob_threshold) {
+			if ((ret = __blob_generate_dir_ids(
+			    dbp, txn, &dbp->blob_file_id)) != 0)
+				return (ret);
+
+		}
 		if ((ret = __os_calloc(dbp->env, 1, dbp->pgsize, &buf)) != 0)
 			return (ret);
 		meta = (HMETA *)buf;
@@ -491,6 +526,13 @@ __ham_new_subdb(mdbp, dbp, ip, txn)
 	LOCK_INIT(metalock);
 	LOCK_INIT(mmlock);
 
+	if (dbp->blob_threshold) {
+		if ((ret = __blob_generate_dir_ids(
+		    dbp, txn, &dbp->blob_sdb_id)) != 0)
+			return (ret);
+
+	}
+
 	if ((ret = __db_cursor(mdbp, ip, txn,
 	    &dbc, CDB_LOCKING(env) ?  DB_WRITECURSOR : 0)) != 0)
 		return (ret);
diff --git a/src/hash/hash_page.c b/src/hash/hash_page.c
index 7576fe61..8e0f897d 100644
--- a/src/hash/hash_page.c
+++ b/src/hash/hash_page.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1990, 1993, 1994
@@ -129,7 +129,7 @@ recheck:
 		/* Fetch next page. */
 		if (NEXT_PGNO(hcp->page) == PGNO_INVALID) {
 			F_SET(hcp, H_NOMORE);
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 		}
 		next_pgno = NEXT_PGNO(hcp->page);
 		hcp->indx = 0;
@@ -344,7 +344,7 @@ __ham_item_prev(dbc, mode, pgnop)
 		if (hcp->pgno == PGNO_INVALID) {
 			/* Beginning of bucket. */
 			F_SET(hcp, H_NOMORE);
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 		} else if ((ret =
 		    __ham_next_cpage(dbc, hcp->pgno)) != 0)
 			return (ret);
@@ -371,7 +371,7 @@ __ham_item_prev(dbc, mode, pgnop)
 		if (hcp->indx == 0) {
 			/* Bucket was empty. */
 			F_SET(hcp, H_NOMORE);
-			return (DB_NOTFOUND);
+			return (DBC_ERR(dbc, DB_NOTFOUND));
 		}
 	}
 
@@ -497,7 +497,8 @@ __ham_insertpair(dbc, p, indxp, key_dbt, data_dbt, key_type, data_type)
 	inp = P_INP(dbp, p);
 	ksize = (key_type == H_OFFPAGE) ?
 	    key_dbt->size : HKEYDATA_SIZE(key_dbt->size);
-	dsize = (data_type == H_OFFPAGE || data_type == H_OFFDUP) ?
+	dsize = (data_type == H_OFFPAGE ||
+	    data_type == H_OFFDUP || data_type == H_BLOB) ?
 	    data_dbt->size : HKEYDATA_SIZE(data_dbt->size);
 	increase = ksize + dsize;
 
@@ -579,7 +580,8 @@ __ham_insertpair(dbc, p, indxp, key_dbt, data_dbt, key_type, data_type)
 	else
 		PUT_HKEYDATA(P_ENTRY(dbp, p, indx), key_dbt->data,
 		    key_dbt->size, key_type);
-	if (data_type == H_OFFPAGE || data_type == H_OFFDUP)
+	if (data_type == H_BLOB ||
+	    data_type == H_OFFPAGE || data_type == H_OFFDUP)
 		memcpy(P_ENTRY(dbp, p, indx+1), data_dbt->data,
 		    data_dbt->size);
 	else
@@ -618,6 +620,8 @@ __ham_getindex(dbc, p, key, key_type, match, indx)
 {
 	/* Since all entries are key/data pairs. */
 	DB_ASSERT(dbc->env, NUM_ENT(p)%2 == 0 );
+	/* Blob files can only be stored as data items. */
+	DB_ASSERT(dbc->env, key_type != H_BLOB );
 
 	/* Support pre 4.6 unsorted hash pages. */
 	if (p->type == P_HASH_UNSORTED)
@@ -672,7 +676,7 @@ __ham_getindex_unsorted(dbc, p, key, match, indx)
 				memcpy(&pgno,
 				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
 				if ((ret = __db_moff(dbc, key, pgno, tlen,
-				    t->h_compare, &res)) != 0)
+				    t->h_compare, &res, NULL)) != 0)
 					return (ret);
 			}
 			break;
@@ -681,7 +685,7 @@ __ham_getindex_unsorted(dbc, p, key, match, indx)
 				DB_INIT_DBT(pg_dbt,
 				    HKEYDATA_DATA(hk), key->size);
 				if (t->h_compare(
-				    dbp, key, &pg_dbt) != 0)
+				    dbp, key, &pg_dbt, NULL) != 0)
 					break;
 			} else if (key->size ==
 			    LEN_HKEY(dbp, p, dbp->pgsize, i))
@@ -784,7 +788,7 @@ __ham_getindex_sorted(dbc, p, key, key_type, match, indxp)
 				(void)__ua_memcpy(&off_pgno,
 				    HOFFPAGE_PGNO(offp), sizeof(db_pgno_t));
 				if ((ret = __db_moff(dbc, key, off_pgno,
-				    itemlen, t->h_compare, &res)) != 0)
+				    itemlen, t->h_compare, &res, NULL)) != 0)
 					return (ret);
 			}
 		} else {
@@ -799,7 +803,7 @@ __ham_getindex_sorted(dbc, p, key, key_type, match, indxp)
 				(void)__ua_memcpy(&off_len, HOFFPAGE_TLEN(offp),
 				    sizeof(u_int32_t));
 				if ((ret = __db_moff(dbc, &tmp_dbt, off_pgno,
-				    off_len, t->h_compare, &res)) != 0)
+				    off_len, t->h_compare, &res, NULL)) != 0)
 					return (ret);
 				/*
 				 * Since we switched the key/match parameters
@@ -810,7 +814,7 @@ __ham_getindex_sorted(dbc, p, key, key_type, match, indxp)
 			} else if (t->h_compare != NULL) {
 				/* Case 4, with a user comparison func */
 				DB_INIT_DBT(tmp_dbt, data, itemlen);
-				res = t->h_compare(dbp, key, &tmp_dbt);
+				res = t->h_compare(dbp, key, &tmp_dbt, NULL);
 			} else {
 				/* Case 4, without a user comparison func */
 				if ((res = memcmp(key->data, data,
@@ -899,8 +903,8 @@ __ham_verify_sorted_page (dbc, p)
 			    sizeof(u_int32_t));
 			memcpy(&tpgno, HOFFPAGE_PGNO(H_PAIRKEY(dbp, p, i-2)),
 			    sizeof(db_pgno_t));
-			if ((ret = __db_moff(dbc,
-			    &curr_dbt, tpgno, tlen, t->h_compare, &res)) != 0)
+			if ((ret = __db_moff(dbc, &curr_dbt,
+			    tpgno, tlen, t->h_compare, &res, NULL)) != 0)
 				return (ret);
 		} else if (HPAGE_TYPE(dbp, p, i) == H_OFFPAGE) {
 			memset(&prev_dbt, 0, sizeof(prev_dbt));
@@ -910,8 +914,8 @@ __ham_verify_sorted_page (dbc, p)
 			    sizeof(u_int32_t));
 			memcpy(&tpgno, HOFFPAGE_PGNO(H_PAIRKEY(dbp, p, i)),
 			    sizeof(db_pgno_t));
-			if ((ret = __db_moff(dbc,
-			    &prev_dbt, tpgno, tlen, t->h_compare, &res)) != 0)
+			if ((ret = __db_moff(dbc, &prev_dbt, tpgno, tlen,
+			    t->h_compare, &res, NULL)) != 0)
 				return (ret);
 		} else
 			res = memcmp(prev, curr, min(curr_len, prev_len));
@@ -1047,9 +1051,11 @@ __ham_del_pair(dbc, flags, ppg)
 	DBT data_dbt, key_dbt;
 	DB_LSN new_lsn, *n_lsn, tmp_lsn;
 	DB_MPOOLFILE *mpf;
+	HBLOB hblob;
 	HASH_CURSOR *hcp;
 	PAGE *n_pagep, *nn_pagep, *p, *p_pagep;
 	db_ham_mode op;
+	db_seq_t blob_id;
 	db_indx_t ndx;
 	db_pgno_t chg_pgno, pgno, tmp_pgno;
 	u_int32_t data_type, key_type, order;
@@ -1067,6 +1073,8 @@ __ham_del_pair(dbc, flags, ppg)
 	    DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &hcp->page)) != 0)
 		return (ret);
 	p = hcp->page;
+	key_type = HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx));
+	data_type = HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx));
 
 	/*
 	 * We optimize for the normal case which is when neither the key nor
@@ -1075,8 +1083,7 @@ __ham_del_pair(dbc, flags, ppg)
 	 * to remove the big item and then update the page to remove the
 	 * entry referring to the big item.
 	 */
-	if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) &&
-	    HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)) == H_OFFPAGE) {
+	if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) && key_type == H_OFFPAGE) {
 		memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_KEYINDEX(ndx))),
 		    sizeof(db_pgno_t));
 		ret = __db_doff(dbc, pgno);
@@ -1084,7 +1091,13 @@ __ham_del_pair(dbc, flags, ppg)
 		ret = 0;
 
 	if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) && ret == 0)
-		switch (HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx))) {
+		switch (data_type) {
+		case H_BLOB:
+			memcpy(&hblob,
+			    P_ENTRY(dbp, p, H_DATAINDEX(ndx)), HBLOB_SIZE);
+			blob_id = (db_seq_t)hblob.id;
+			ret = __blob_del(dbc, blob_id);
+			break;
 		case H_OFFPAGE:
 			memcpy(&pgno,
 			    HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_DATAINDEX(ndx))),
@@ -1111,7 +1124,7 @@ __ham_del_pair(dbc, flags, ppg)
 	/* Now log the delete off this page. */
 	if (DBC_LOGGING(dbc)) {
 		hk = H_PAIRKEY(dbp, hcp->page, ndx);
-		if ((key_type = HPAGE_PTYPE(hk)) == H_OFFPAGE) {
+		if (key_type == H_OFFPAGE) {
 			key_dbt.data = hk;
 			key_dbt.size = HOFFPAGE_SIZE;
 		} else {
@@ -1120,9 +1133,12 @@ __ham_del_pair(dbc, flags, ppg)
 			    LEN_HKEY(dbp, hcp->page, dbp->pgsize, ndx);
 		}
 		hk = H_PAIRDATA(dbp, hcp->page, ndx);
-		if ((data_type = HPAGE_PTYPE(hk)) == H_OFFPAGE) {
+		if (data_type == H_OFFPAGE) {
 			data_dbt.data = hk;
 			data_dbt.size = HOFFPAGE_SIZE;
+		} else if (data_type == H_BLOB) {
+			data_dbt.data = hk;
+			data_dbt.size = HBLOB_SIZE;
 		} else if (data_type == H_OFFDUP) {
 			data_dbt.data = hk;
 			data_dbt.size = HOFFDUP_SIZE;
@@ -1404,6 +1420,8 @@ __ham_replpair(dbc, dbt, newtype)
 	 * unless it is an append, when we extend the offpage item, and
 	 * update the HOFFPAGE item on the current page to have the new size
 	 * via a delete/add.
+	 *
+	 * Updating a record won't cause it to become a blob file or vice versa.
 	 */
 	dbp = dbc->dbp;
 	env = dbp->env;
@@ -2464,15 +2482,18 @@ __ham_add_el(dbc, key, val, type)
 	const DBT *pkey, *pdata;
 	DB *dbp;
 	DBT key_dbt, data_dbt;
-	DB_LSN new_lsn;
+	DB_LSN blob_lsn, new_lsn;
 	DB_MPOOLFILE *mpf;
 	HASH_CURSOR *hcp;
 	HOFFPAGE doff, koff;
+	HBLOB dblob;
 	PAGE *new_pagep;
 	db_pgno_t next_pgno, pgno;
+	off_t file_size;
+	db_seq_t blob_id;
 	u_int32_t data_size, data_type, key_size, key_type;
 	u_int32_t pages, pagespace, pairsize;
-	int do_expand, is_keybig, is_databig, match, ret;
+	int do_expand, is_keybig, match, ret;
 
 	dbp = dbc->dbp;
 	mpf = dbp->mpf;
@@ -2485,14 +2506,33 @@ __ham_add_el(dbc, key, val, type)
 	    dbc->thread_info, dbc->txn, DB_MPOOL_CREATE, &hcp->page)) != 0)
 		return (ret);
 
+	/*
+	 * Key is either:
+	 * - On page
+	 * - On overflow page(s)
+	 */
 	key_size = HKEYDATA_PSIZE(key->size);
-	data_size = HKEYDATA_PSIZE(val->size);
 	is_keybig = ISBIG(hcp, key->size);
-	is_databig = ISBIG(hcp, val->size);
 	if (is_keybig)
 		key_size = HOFFPAGE_PSIZE;
-	if (is_databig)
+	/*
+	 * Data is either:
+	 * - On page (H_KEYDATA or H_DUPLICATE)
+	 * - On overflow page(s)
+	 * - In a blob file
+	 */
+	data_type =
+	    (dbp->blob_threshold && (val->size >= dbp->blob_threshold ||
+	    F_ISSET(val, DB_DBT_BLOB))) ?
+	    H_BLOB : (ISBIG(hcp, val->size)) ? H_OFFPAGE : H_KEYDATA;
+	if (data_type == H_KEYDATA || data_type == H_DUPLICATE)
+		data_size = HKEYDATA_PSIZE(val->size);
+	else if (data_type == H_OFFPAGE)
 		data_size = HOFFPAGE_PSIZE;
+	else { /* H_BLOB */
+		DB_ASSERT(dbp->env, data_type == H_BLOB);
+		data_size = HBLOB_PSIZE;
+	}
 
 	pairsize = key_size + data_size;
 
@@ -2536,17 +2576,17 @@ __ham_add_el(dbc, key, val, type)
 	 * run out of file space before updating the key or data.
 	 */
 	if (dbc->txn == NULL &&
-	    dbp->mpf->mfp->maxpgno != 0 && (is_keybig || is_databig)) {
+	    dbp->mpf->mfp->maxpgno != 0 &&
+	    (is_keybig || data_type == H_OFFPAGE)) {
 		pagespace = P_MAXSPACE(dbp, dbp->pgsize);
 		pages = 0;
-		if (is_databig)
+		if (data_type == H_OFFPAGE)
 			pages = ((data_size - 1) / pagespace) + 1;
-		if (is_keybig) {
+		if (is_keybig)
 			pages += ((key->size - 1) / pagespace) + 1;
-			if (pages >
-			    (dbp->mpf->mfp->maxpgno - dbp->mpf->mfp->last_pgno))
-				return (__db_space_err(dbp));
-		}
+		if (pages >
+		    (dbp->mpf->mfp->maxpgno - dbp->mpf->mfp->last_pgno))
+			return (__db_space_err(dbp));
 	}
 
 	if ((ret = __memp_dirty(mpf,
@@ -2575,7 +2615,7 @@ __ham_add_el(dbc, key, val, type)
 		key_type = H_KEYDATA;
 	}
 
-	if (is_databig) {
+	if (data_type == H_OFFPAGE) {
 		doff.type = H_OFFPAGE;
 		UMRW_SET(doff.unused[0]);
 		UMRW_SET(doff.unused[1]);
@@ -2587,6 +2627,22 @@ __ham_add_el(dbc, key, val, type)
 		data_dbt.size = sizeof(doff);
 		pdata = &data_dbt;
 		data_type = H_OFFPAGE;
+	} else if (data_type == H_BLOB) {
+		memset(&dblob, 0, HBLOB_SIZE);
+		dblob.type = H_BLOB;
+		blob_id = 0;
+		file_size = 0;
+		if ((ret = __blob_put(
+		    dbc, (DBT *)val, &blob_id, &file_size, &blob_lsn)) != 0)
+			return (ret);
+		SET_BLOB_ID(&dblob, blob_id, HBLOB);
+		SET_BLOB_SIZE(&dblob, file_size, HBLOB);
+		SET_BLOB_FILE_ID(&dblob, dbp->blob_file_id, HBLOB);
+		SET_BLOB_SDB_ID(&dblob, dbp->blob_sdb_id, HBLOB);
+		data_dbt.data = &dblob;
+		data_dbt.size = sizeof(dblob);
+		pdata = &data_dbt;
+		data_type = H_BLOB;
 	} else {
 		pdata = val;
 		data_type = type;
@@ -2673,7 +2729,7 @@ __ham_add_el(dbc, key, val, type)
 /*
  * Special insert pair call -- copies a key/data pair from one page to
  * another.  Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
- * H_DUPLICATE, H_OFFDUP).  Since we log splits at a high level, we
+ * H_DUPLICATE, H_OFFDUP, H_BLOB).  Since we log splits at a high level, we
  * do not need to log them here.
  *
  * dest_indx is an optional parameter, it serves several purposes:
@@ -2715,7 +2771,7 @@ __ham_copypair(dbc, src_page, src_ndx, dest_page, dest_indx, log)
 		tkey.data = HKEYDATA_DATA(P_ENTRY(dbp, src_page, kindx));
 		tkey.size = LEN_HKEYDATA(dbp, src_page, dbp->pgsize, kindx);
 	}
-	if (dtype == H_OFFPAGE || dtype == H_OFFDUP) {
+	if (dtype == H_OFFPAGE || dtype == H_OFFDUP || dtype == H_BLOB) {
 		tdata.data = P_ENTRY(dbp, src_page, dindx);
 		tdata.size = LEN_HITEM(dbp, src_page, dbp->pgsize, dindx);
 	} else {
diff --git a/src/hash/hash_rec.c b/src/hash/hash_rec.c
index 58965569..8a39d880 100644
--- a/src/hash/hash_rec.c
+++ b/src/hash/hash_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1995, 1996
@@ -232,6 +232,7 @@ __ham_insdel_42_recover(env, dbtp, lsnp, op, info)
 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
 		ktype = DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ?
 		    H_OFFPAGE : H_KEYDATA;
+		/* TODO: May need a PAIR_ISDATABLOB here. */
 		if (PAIR_ISDATADUP(argp->opcode))
 			dtype = H_DUPLICATE;
 		else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode))
@@ -957,9 +958,8 @@ __ham_metagroup_recover(env, dbtp, lsnp, op, info)
 
 			if (IS_ZERO_LSN(LSN(pagep))) {
 				REC_DIRTY(mpf, ip, dbc->priority, &pagep);
-				P_INIT(pagep, file_dbp->pgsize,
-				    PGNO_INVALID, PGNO_INVALID, PGNO_INVALID,
-				    0, P_HASH);
+ 				P_INIT(pagep, file_dbp->pgsize, pgno,
+ 				    PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
 			}
 			if ((ret =
 			    __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
diff --git a/src/hash/hash_reclaim.c b/src/hash/hash_reclaim.c
index ce3f6d9e..55980444 100644
--- a/src/hash/hash_reclaim.c
+++ b/src/hash/hash_reclaim.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/hash/hash_stat.c b/src/hash/hash_stat.c
index 683ce5a6..7ccf472d 100644
--- a/src/hash/hash_stat.c
+++ b/src/hash/hash_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -188,15 +188,19 @@ __ham_stat_print(dbc, flags)
 	    sp->hash_bfree, sp->hash_buckets, sp->hash_pagesize), "ff");
 
 	__db_dl(env,
-	    "Number of overflow pages", (u_long)sp->hash_bigpages);
-	__db_dl_pct(env, "Number of bytes free in overflow pages",
+	    "Number of blobs", (u_long)sp->hash_nblobs);
+	__db_dl(env,
+	    "Number of hash overflow (big item) pages",
+	    (u_long)sp->hash_bigpages);
+	__db_dl_pct(env,
+	    "Number of bytes free in hash overflow (big item) pages",
 	    (u_long)sp->hash_big_bfree, DB_PCT_PG(
 	    sp->hash_big_bfree, sp->hash_bigpages, sp->hash_pagesize), "ff");
 
 	__db_dl(env,
 	    "Number of bucket overflow pages", (u_long)sp->hash_overflows);
 	__db_dl_pct(env,
-	    "Number of bytes free in bucket overflow pages",
+	    "Number of bytes free on bucket overflow pages",
 	    (u_long)sp->hash_ovfl_free, DB_PCT_PG(
 	    sp->hash_ovfl_free, sp->hash_overflows, sp->hash_pagesize), "ff");
 
@@ -258,6 +262,9 @@ __ham_stat_callback(dbc, pagep, cookie, putp)
 			switch (*H_PAIRDATA(dbp, pagep, indx)) {
 			case H_OFFDUP:
 				break;
+			case H_BLOB:
+				sp->hash_nblobs++;
+				/* fall through */
 			case H_OFFPAGE:
 			case H_KEYDATA:
 				sp->hash_ndata++;
@@ -480,6 +487,7 @@ __ham_traverse(dbc, mode, callback, cookie, look_past_max)
 					    opgno, callback, cookie)) != 0)
 						goto err;
 					break;
+				case H_BLOB:
 				case H_KEYDATA:
 				case H_DUPLICATE:
 					break;
diff --git a/src/hash/hash_stub.c b/src/hash/hash_stub.c
index 57337ea9..89307670 100644
--- a/src/hash/hash_stub.c
+++ b/src/hash/hash_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -127,6 +127,40 @@ __ham_46_hashmeta(dbp, real_name, flags, fhp, h, dirtyp)
 }
 
 int
+__ham_60_hashmeta(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(flags, 0);
+	COMPQUIET(fhp, NULL);
+	COMPQUIET(h, NULL);
+	COMPQUIET(dirtyp, NULL);
+	return (__db_no_hash_am(dbp->env));
+}
+
+int
+__ham_60_hash(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(flags, 0);
+	COMPQUIET(fhp, NULL);
+	COMPQUIET(h, NULL);
+	COMPQUIET(dirtyp, NULL);
+	return (__db_no_hash_am(dbp->env));
+}
+
+int
 __hamc_cmp(dbc, other_dbc, result)
 	DBC *dbc, *other_dbc;
 	int *result;
diff --git a/src/hash/hash_upgrade.c b/src/hash/hash_upgrade.c
index f66a7a58..17014a5c 100644
--- a/src/hash/hash_upgrade.c
+++ b/src/hash/hash_upgrade.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/hash.h"
 #include "dbinc/db_upgrade.h"
@@ -321,3 +322,93 @@ __ham_46_hash(dbp, real_name, flags, fhp, h, dirtyp)
 
 	return (ret);
 }
+
+/*
+ * __ham_60_hashmeta--
+ *	Upgrade the version number.
+ *
+ * PUBLIC: int __ham_60_hashmeta
+ * PUBLIC:      __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+ */
+int
+__ham_60_hashmeta(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	HMETA33 *hmeta;
+
+	COMPQUIET(flags, 0);
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(fhp, NULL);
+	COMPQUIET(dbp, NULL);
+	hmeta = (HMETA33 *)h;
+
+	hmeta->dbmeta.version = 10;
+	*dirtyp = 1;
+
+	return (0);
+}
+
+/*
+ * __ham_60_hash --
+ *	Upgrade the blob records on the database hash leaf pages.
+ *
+ * PUBLIC: int __ham_60_hash
+ * PUBLIC:      __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+ */
+int
+__ham_60_hash(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	HBLOB60 hb60;
+	HBLOB60P1 hb60p1;
+	HKEYDATA *hk;
+	db_seq_t blob_id, blob_size, file_id, sdb_id;
+	db_indx_t indx;
+	int ret;
+
+	COMPQUIET(flags, 0);
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(fhp, NULL);
+	ret = 0;
+
+	DB_ASSERT(dbp->env, HBLOB60_SIZE == HBLOB_SIZE);
+	for (indx = 0; indx < NUM_ENT(h); indx += 2) {
+		hk = (HKEYDATA *)H_PAIRDATA(dbp, h, indx);
+		if (HPAGE_PTYPE(hk) == H_BLOB) {
+			memcpy(&hb60, hk, HBLOB60_SIZE);
+			memset(&hb60p1, 0, HBLOB_SIZE);
+			hb60p1.type = hb60.type;
+			hb60p1.encoding = hb60.encoding;
+			GET_BLOB60_ID(dbp->env, hb60, blob_id, ret);
+			if (ret != 0)
+				return (ret);
+			GET_BLOB60_SIZE(dbp->env, hb60, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			GET_BLOB60_FILE_ID(dbp->env, &hb60, file_id, ret);
+			if (ret != 0)
+				return (ret);
+			GET_BLOB60_SDB_ID(dbp->env, &hb60, sdb_id, ret);
+			if (ret != 0)
+				return (ret);
+			SET_BLOB_ID(&hb60p1, blob_id, HBLOB60P1);
+			SET_BLOB_SIZE(&hb60p1, blob_size, HBLOB60P1);
+			SET_BLOB_FILE_ID(&hb60p1, file_id, HBLOB60P1);
+			SET_BLOB_SDB_ID(&hb60p1, sdb_id, HBLOB60P1);
+			memcpy(hk, &hb60p1, HBLOB_SIZE);
+			*dirtyp = 1;
+		}
+	}
+
+	return (ret);
+}
diff --git a/src/hash/hash_verify.c b/src/hash/hash_verify.c
index 662e7ac8..302d42d8 100644
--- a/src/hash/hash_verify.c
+++ b/src/hash/hash_verify.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_verify.h"
 #include "dbinc/btree.h"
@@ -47,6 +48,7 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags)
 	int i, ret, t_ret, isbad;
 	u_int32_t pwr, mbucket;
 	u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t));
+	db_seq_t blob_id;
 
 	env = dbp->env;
 	isbad = 0;
@@ -164,6 +166,55 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags)
 		}
 	}
 
+/*
+ * Where 64-bit integer support is not available,
+ * return an error if the file has any blobs.
+ */
+	t_ret = 0;
+#ifdef HAVE_64BIT_TYPES
+	GET_BLOB_FILE_ID(env, m, blob_id, t_ret);
+	if (t_ret != 0) {
+		isbad = 1;
+		EPRINT((env, DB_STR_A("1178",
+		    "Page %lu: blob file id overflow.", "%lu"), (u_long)pgno));
+		if (ret == 0)
+			ret = t_ret;
+	}
+	t_ret = 0;
+	GET_BLOB_SDB_ID(env, m, blob_id, t_ret);
+	if (t_ret != 0) {
+		isbad = 1;
+		EPRINT((env, DB_STR_A("1179",
+		    "Page %lu: blob subdatabase id overflow.",
+		    "%lu"), (u_long)pgno));
+		if (ret == 0)
+			ret = t_ret;
+	}
+#else /* HAVE_64BIT_TYPES */
+	/*
+	 * db_seq_t is an int on systems that do not have 64 integer types, so
+	 * this will compile and run.
+	 */
+	GET_BLOB_FILE_ID(env, m, blob_id, t_ret);
+	if (t_ret != 0 || blob_id != 0) {
+		isbad = 1;
+		EPRINT((env, DB_STR_A("1203",
+		    "Page %lu: blobs require 64 integer compiler support.",
+		    "%lu"), (u_long)pgno));
+		if (ret == 0)
+			ret = t_ret;
+	}
+	GET_BLOB_SDB_ID(env, m, blob_id, t_ret);
+	if (t_ret != 0 || blob_id != 0) {
+		isbad = 1;
+		EPRINT((env, DB_STR_A("1204",
+		    "Page %lu: blobs require 64 integer compiler support.",
+		    "%lu"), (u_long)pgno));
+		if (ret == 0)
+			ret == t_ret;
+	}
+#endif
+
 err:	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
 		ret = t_ret;
 	if (LF_ISSET(DB_SALVAGE) &&
@@ -272,12 +323,15 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
 	PAGE *h;
 	u_int32_t i, flags;
 {
+	HBLOB hblob;
 	HOFFDUP hod;
 	HOFFPAGE hop;
 	VRFY_CHILDINFO child;
 	VRFY_PAGEINFO *pip;
 	db_indx_t offset, len, dlen, elen;
 	int ret, t_ret;
+	off_t blob_size;
+	db_seq_t blob_id, file_id, sdb_id;
 	u_int8_t *databuf;
 
 	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
@@ -287,6 +341,38 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
 	case H_KEYDATA:
 		/* Nothing to do here--everything but the type field is data */
 		break;
+	case H_BLOB:
+		/*
+		 * Blob item.  Check that the blob file exists and is the same
+		 * file size as is stored in the database record.
+		 */
+		memcpy(&hblob, P_ENTRY(dbp, h, i), HBLOB_SIZE);
+		blob_id = (db_seq_t)hblob.id;
+		GET_BLOB_SIZE(dbp->env, hblob, blob_size, ret);
+		if (ret != 0 || blob_size < 0) {
+			EPRINT((dbp->env, DB_STR_A("1181",
+			    "Page %lu: blob file size value has overflowed",
+			    "%lu"), (u_long)pip->pgno));
+			ret = DB_VERIFY_BAD;
+			goto err;
+		}
+		file_id = (db_seq_t)hblob.file_id;
+		sdb_id = (db_seq_t)hblob.sdb_id;
+		if (file_id == 0 && sdb_id == 0) {
+			EPRINT((dbp->env, DB_STR_A("1184",
+		"Page %lu: invalid blob dir ids %llu %llu at item %lu",
+			    "%lu %llu %llu %lu"),
+			    (u_long)pip->pgno, (unsigned long long)file_id,
+			    (unsigned long long)sdb_id, (u_long)i));
+			ret = DB_VERIFY_BAD;
+			goto err;
+		}
+		if ((ret = __blob_vrfy(dbp->env, blob_id,
+		    blob_size, file_id, sdb_id, pip->pgno, flags)) != 0) {
+			ret = DB_VERIFY_BAD;
+			goto err;
+		}
+		break;
 	case H_DUPLICATE:
 		/* Are we a datum or a key?  Better be the former. */
 		if (i % 2 == 0) {
@@ -822,15 +908,23 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags)
 	u_int32_t flags;
 {
 	DBT dbt, key_dbt, unkdbt;
+	ENV *env;
+	HBLOB hblob;
+	char *prefix;
 	db_pgno_t dpgno;
 	int ret, err_ret, t_ret;
-	u_int32_t himark, i, ovfl_bufsz;
-	u_int8_t *hk, *p;
+	off_t blob_size, blob_offset, remaining;
+	u_int32_t blob_buf_size, himark, i, ovfl_bufsz;
+	u_int8_t *blob_buf, *hk, *p;
+	db_seq_t blob_id, file_id, sdb_id;
 	void *buf, *key_buf;
 	db_indx_t dlen, len, tlen;
 
 	memset(&dbt, 0, sizeof(DBT));
 	dbt.flags = DB_DBT_REALLOC;
+	blob_buf = NULL;
+	blob_buf_size = 0;
+	env = dbp->env;
 
 	DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1);
 
@@ -840,9 +934,9 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags)
 	 * Allocate a buffer for overflow items.  Start at one page;
 	 * __db_safe_goff will realloc as needed.
 	 */
-	if ((ret = __os_malloc(dbp->env, dbp->pgsize, &buf)) != 0)
+	if ((ret = __os_malloc(env, dbp->pgsize, &buf)) != 0)
 		return (ret);
-    ovfl_bufsz = dbp->pgsize;
+	ovfl_bufsz = dbp->pgsize;
 
 	himark = dbp->pgsize;
 	for (i = 0;; i++) {
@@ -886,6 +980,70 @@ keydata:			memcpy(buf, HKEYDATA_DATA(hk), len);
 				    0, " ", handle, callback, 0, 0, vdp)) != 0)
 					err_ret = ret;
 				break;
+			case H_BLOB:
+				memcpy(&hblob, hk, HBLOB_SIZE);
+				blob_id = (db_seq_t)hblob.id;
+				GET_BLOB_SIZE(env, hblob, blob_size, ret);
+				if (ret != 0 || blob_size < 0) {
+					err_ret = DB_VERIFY_BAD;
+					continue;
+				}
+				file_id = (db_seq_t)hblob.file_id;
+				sdb_id = (db_seq_t)hblob.sdb_id;
+				/* Read the blob, in pieces if too large.*/
+				blob_offset = 0;
+				if (blob_size > MEGABYTE) {
+					if (blob_buf_size < MEGABYTE) {
+						if ((ret = __os_realloc(
+						    env, MEGABYTE,
+						    &blob_buf)) != 0) {
+							err_ret = ret;
+							continue;
+						}
+						blob_buf_size = MEGABYTE;
+					}
+				} else if (blob_buf_size < blob_size) {
+					blob_buf_size = (u_int32_t)blob_size;
+					if ((ret = __os_realloc(env,
+					    blob_buf_size, &blob_buf)) != 0) {
+						err_ret = ret;
+						continue;
+					}
+				}
+				dbt.data = blob_buf;
+				dbt.ulen = blob_buf_size;
+				remaining = blob_size;
+				prefix = " ";
+				do {
+					if ((ret = __blob_salvage(env, blob_id,
+					    blob_offset,
+					    (remaining < blob_buf_size ?
+					    (size_t)remaining : blob_buf_size),
+					    file_id, sdb_id, &dbt)) != 0) {
+						err_ret = DB_VERIFY_BAD;
+						break;
+					}
+					if (remaining > blob_buf_size)
+						F_SET(
+						    vdp, SALVAGE_STREAM_BLOB);
+					else
+						F_CLR(
+						    vdp, SALVAGE_STREAM_BLOB);
+					if ((ret = __db_vrfy_prdbt(
+					    &dbt, 0, prefix, handle,
+					    callback, 0, 0, vdp)) != 0) {
+						err_ret = ret;
+						break;
+					}
+					prefix = NULL;
+					blob_offset += dbt.size;
+					if (remaining < blob_buf_size)
+						remaining = 0;
+					else
+						remaining -= blob_buf_size;
+				} while (remaining > 0);
+				F_CLR(vdp, SALVAGE_STREAM_BLOB);
+				break;
 			case H_OFFPAGE:
 				if (len < HOFFPAGE_SIZE) {
 					err_ret = DB_VERIFY_BAD;
@@ -960,7 +1118,7 @@ keydata:			memcpy(buf, HKEYDATA_DATA(hk), len);
 				 */
 				memset(&key_dbt, 0, sizeof(key_dbt));
 				if ((ret = __os_malloc(
-				    dbp->env, dbt.size, &key_buf)) != 0)
+				    env, dbt.size, &key_buf)) != 0)
 					return (ret);
 				memcpy(key_buf, buf, dbt.size);
 				key_dbt.data = key_buf;
@@ -1002,7 +1160,7 @@ keydata:			memcpy(buf, HKEYDATA_DATA(hk), len);
 					    handle, callback, 0, 0, vdp)) != 0)
 						err_ret = ret;
 				}
-				__os_free(dbp->env, key_buf);
+				__os_free(env, key_buf);
 				break;
 			default:
 				if (!LF_ISSET(DB_AGGRESSIVE))
@@ -1013,7 +1171,9 @@ keydata:			memcpy(buf, HKEYDATA_DATA(hk), len);
 		}
 	}
 
-	__os_free(dbp->env, buf);
+	if (blob_buf != NULL)
+		__os_free(env, blob_buf);
+	__os_free(env, buf);
 	if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0)
 		return (t_ret);
 	return ((ret == 0 && err_ret != 0) ? err_ret : ret);
@@ -1129,7 +1289,7 @@ __ham_dups_unsorted(dbp, buf, len)
 {
 	DBT a, b;
 	db_indx_t offset, dlen;
-	int (*func) __P((DB *, const DBT *, const DBT *));
+	int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
 
 	memset(&a, 0, sizeof(DBT));
 	memset(&b, 0, sizeof(DBT));
@@ -1146,7 +1306,7 @@ __ham_dups_unsorted(dbp, buf, len)
 		b.data = buf + offset + sizeof(db_indx_t);
 		b.size = dlen;
 
-		if (a.data != NULL && func(dbp, &a, &b) > 0)
+		if (a.data != NULL && func(dbp, &a, &b, NULL) > 0)
 			return (1);
 
 		a.data = b.data;
diff --git a/src/heap/heap.c b/src/heap/heap.c
index ab404658..7aec416b 100644
--- a/src/heap/heap.c
+++ b/src/heap/heap.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -24,6 +24,8 @@ static int  __heapc_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
 static int  __heapc_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
 static int  __heapc_reloc __P((DBC *, DBT *, DBT *));
 static int  __heapc_reloc_partial __P((DBC *, DBT *, DBT *));
+static void  __heapc_search __P((DBC *, HEAPPG *, db_indx_t,
+		int, db_indx_t *, int *));
 static int  __heapc_split __P((DBC *, DBT *, DBT *, int));
 
 /*
@@ -134,12 +136,15 @@ __heap_bulk(dbc, data, flags)
 	DB_HEAP_RID prev_rid, rid;
 	DBT sdata;
 	HEAP_CURSOR *cp;
+	HEAPBLOBHDR bhdr;
 	HEAPHDR *hdr;
 	HEAPSPLITHDR *shdr;
 	PAGE *pg;
 	db_lockmode_t lock_type;
 	int is_key, ret;
 	int32_t *offp;
+	off_t blob_size;
+	db_seq_t blob_id;
 	u_int32_t data_size, key_size, needed, space;
 	u_int8_t *dbuf, *np;
 
@@ -183,6 +188,7 @@ __heap_bulk(dbc, data, flags)
 next_pg:
 	rid.indx = cp->indx;
 	rid.pgno = cp->pgno;
+	prev_rid = rid;
 	pg = cp->page;
 
 	/*
@@ -213,6 +219,14 @@ next_pg:
 		if (F_ISSET(hdr, HEAP_RECSPLIT)) {
 			shdr = (HEAPSPLITHDR *)hdr;
 			data_size = DB_ALIGN(shdr->tsize, sizeof(u_int32_t));
+		} else if (F_ISSET(hdr, HEAP_RECBLOB)) {
+			memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE);
+			GET_BLOB_SIZE(dbp->env, bhdr, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			if (blob_size > UINT32_MAX)
+				return (DB_BUFFER_SMALL);
+			data_size = (u_int32_t)blob_size;
 		} else
 			data_size = DB_ALIGN(hdr->size, sizeof(u_int32_t));
 		needed += 2 * sizeof(*offp) + data_size;
@@ -250,13 +264,21 @@ next_pg:
 			if ((ret = __heapc_gsplit(
 			    dbc, &sdata, NULL, NULL)) != 0)
 				return (ret);
-		} else {
+		} else if (F_ISSET(hdr, HEAP_RECBLOB)) {
+			memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE);
+			blob_id = (db_seq_t)bhdr.id;
+			if ((ret = __blob_bulk(
+			    dbc, data_size, blob_id, np)) != 0)
+				return (ret);
+		}else {
 			memcpy(np,
 			    (u_int8_t *)hdr + sizeof(HEAPHDR), hdr->size);
 		}
 		*offp-- = (int32_t)(np - dbuf);
 		if (F_ISSET(hdr, HEAP_RECSPLIT))
 			*offp-- = (int32_t)shdr->tsize;
+		else if (F_ISSET(hdr, HEAP_RECBLOB))
+			*offp-- = (int32_t)data_size;
 		else
 			*offp-- = (int32_t)hdr->size;
 		np += data_size;
@@ -296,7 +318,6 @@ __heapc_close(dbc, root_pgno, rmroot)
 	db_pgno_t root_pgno;
 	int *rmroot;
 {
-	DB_MPOOLFILE *mpf;
 	HEAP_CURSOR *cp;
 	int ret;
 
@@ -304,7 +325,6 @@ __heapc_close(dbc, root_pgno, rmroot)
 	COMPQUIET(rmroot, 0);
 
 	cp = (HEAP_CURSOR *)dbc->internal;
-	mpf = dbc->dbp->mpf;
 	ret = 0;
 
 	/* Release the page/lock held by the cursor. */
@@ -325,11 +345,14 @@ __heapc_del(dbc, flags)
 	DB_MPOOLFILE *mpf;
 	DBT hdr_dbt, log_dbt;
 	HEAP *h;
+	HEAPBLOBHDR bhdr;
 	HEAPHDR *hdr;
 	HEAPPG *rpage;
 	HEAP_CURSOR *cp;
 	db_pgno_t region_pgno;
-	int oldspacebits, ret, spacebits, t_ret;
+	int ret, t_ret;
+	db_seq_t blob_id;
+	u_int32_t oldspacebits, spacebits;
 	u_int16_t data_size, size;
 
 	dbp = dbc->dbp;
@@ -337,6 +360,7 @@ __heapc_del(dbc, flags)
 	h = dbp->heap_internal;
 	cp = (HEAP_CURSOR *)dbc->internal;
 	rpage = NULL;
+	ret = 0;
 	COMPQUIET(flags, 0);
 
 	/*
@@ -377,6 +401,14 @@ start:	if (STD_LOCKING(dbc) && (ret = __db_lget(dbc,
 		next_rid.indx = 0;
 	}
 
+	/* Delete the blob file. */
+	if (F_ISSET(hdr, HEAP_RECBLOB)) {
+		memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE);
+		blob_id = (db_seq_t)bhdr.id;
+		if ((ret = __blob_del(dbc, blob_id)) != 0)
+			return (ret);
+	}
+
 	/* Log the deletion. */
 	if (DBC_LOGGING(dbc)) {
 		hdr_dbt.data = hdr;
@@ -384,8 +416,9 @@ start:	if (STD_LOCKING(dbc) && (ret = __db_lget(dbc,
 		log_dbt.data = (u_int8_t *)hdr + hdr_dbt.size;
 		log_dbt.size = data_size;
 		if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page),
-		    0, DB_REM_HEAP, cp->pgno, (u_int32_t)cp->indx,
-		    size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
+		    0, OP_SET(DB_REM_HEAP, cp->page),
+		    cp->pgno, (u_int32_t)cp->indx, size,
+		    &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
 			goto err;
 	} else
 		LSN_NOT_LOGGED(LSN(cp->page));
@@ -414,7 +447,7 @@ start:	if (STD_LOCKING(dbc) && (ret = __db_lget(dbc,
 		    dbc->thread_info, NULL, DB_MPOOL_DIRTY, &rpage)) != 0)
 			goto err;
 		HEAP_SETSPACE(dbp, rpage,
-		    cp->pgno - region_pgno - 1, spacebits);
+		    (cp->pgno - region_pgno) - 1, spacebits);
 	}
 
 err:	DB_ASSERT(dbp->env, ret != DB_PAGE_NOTFOUND);
@@ -443,7 +476,8 @@ err:	DB_ASSERT(dbp->env, ret != DB_PAGE_NOTFOUND);
 
 /*
  * __heap_ditem --
- *   Remove an item from a page.
+ *   Remove an item from a page.  Note when deleting blob records that the file
+ *   has to be deleted separate from calling this function.
  *
  * PUBLIC: int __heap_ditem
  * PUBLIC:   __P((DBC *, PAGE *, u_int32_t, u_int32_t));
@@ -537,19 +571,21 @@ __heapc_get(dbc, key, data, flags, pgnop)
 	DB_MPOOLFILE *mpf;
 	DB_LOCK meta_lock;
 	DBT tmp_val;
-	HEAP *h;
+	HEAPBLOBHDR bhdr;
 	HEAPHDR *hdr;
 	HEAPMETA *meta;
 	HEAPPG *dpage;
 	HEAP_CURSOR *cp;
 	db_lockmode_t lock_type;
 	db_pgno_t pgno;
-	int cmp, f_indx, found, getpage, indx, ret;
+	int cmp, np_inc, f_indx, found, getpage, indx, ret;
+	off_t blob_size;
+	db_seq_t blob_id;
 
 	dbp = dbc->dbp;
 	mpf = dbp->mpf;
-	h = dbp->heap_internal;
 	cp = (HEAP_CURSOR *)dbc->internal;
+	pgno = PGNO_INVALID;
 	LOCK_INIT(meta_lock);
 	COMPQUIET(pgnop, NULL);
 
@@ -564,7 +600,7 @@ __heapc_get(dbc, key, data, flags, pgnop)
 	else
 		lock_type = DB_LOCK_READ;
 
-	ret = 0;
+	np_inc = ret = 0;
 	found = getpage = FALSE;
 	meta = NULL;
 	dpage = NULL;
@@ -579,7 +615,7 @@ __heapc_get(dbc, key, data, flags, pgnop)
 		ACQUIRE_CUR(dbc, lock_type, cp->pgno, 0, 0, ret);
 		if (ret != 0) {
 			if (ret == DB_PAGE_NOTFOUND)
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 
@@ -591,7 +627,7 @@ __heapc_get(dbc, key, data, flags, pgnop)
 		hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, cp->indx);
 		if (F_ISSET(hdr, HEAP_RECSPLIT) &&
 		    !F_ISSET(hdr, HEAP_RECFIRST)) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 
@@ -610,7 +646,7 @@ first:		pgno = FIRST_HEAP_DPAGE;
 			ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret);
 			if (ret != 0 ) {
 				if (ret == DB_PAGE_NOTFOUND)
-					ret = DB_NOTFOUND;
+					ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto err;
 			}
 			dpage = (HEAPPG *)cp->page;
@@ -620,25 +656,10 @@ first:		pgno = FIRST_HEAP_DPAGE;
 			 * finding first non-split record or first piece of a
 			 * split record, then set up cursor.
 			 */
-			if (TYPE(dpage) == P_HEAP && NUM_ENT(dpage) != 0) {
-				for (indx = 0;
-				     indx <= HEAP_HIGHINDX(dpage); indx++) {
-					if (HEAP_OFFSETTBL(
-					    dbp, dpage)[indx] == 0)
-						continue;
-					hdr = (HEAPHDR *)P_ENTRY(
-					    dbp, dpage, indx);
-					if (!F_ISSET(hdr, HEAP_RECSPLIT) ||
-					    F_ISSET(hdr, HEAP_RECFIRST)) {
-						found = TRUE;
-						cp->pgno = pgno;
-						cp->indx = indx;
-						break;
-					}
-				}
-				if (!found)
-					pgno++;
-			} else
+			__heapc_search(dbc, dpage, 0, 1, &cp->indx, &found);
+			if (found)
+				cp->pgno = pgno;
+			else
 				pgno++;
 		}
 		break;
@@ -668,7 +689,7 @@ last:		pgno = PGNO_BASE_MD;
 		while (!found) {
 			/* Don't look earlier than the first data page. */
 			if (pgno < FIRST_HEAP_DPAGE) {
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto err;
 			}
 
@@ -683,33 +704,33 @@ last:		pgno = PGNO_BASE_MD;
 			 * non-split record or the first piece of a split record
 			 * is found.
 			 */
-			if (TYPE(dpage) == P_HEAP && NUM_ENT(dpage) != 0) {
-				for (indx = HEAP_HIGHINDX(dpage);
-				     indx >= 0; indx--) {
-					if (HEAP_OFFSETTBL(
-					    dbp, dpage)[indx] == 0)
-						continue;
-					hdr = (HEAPHDR *)P_ENTRY(
-					    dbp, dpage, indx);
-					if (!F_ISSET(hdr, HEAP_RECSPLIT) ||
-					    F_ISSET(hdr, HEAP_RECFIRST)) {
-						found = TRUE;
-						cp->pgno = pgno;
-						cp->indx = indx;
-						break;
-					}
-				}
-				if (!found)
-					pgno--;
-			} else
+			__heapc_search(dbc,
+			    dpage, HEAP_HIGHINDX(dpage), 1, &cp->indx, &found);
+			if (found)
+				cp->pgno = pgno;
+			else
 				pgno--;
 		}
 		break;
 	case DB_NEXT_NODUP:
 	case DB_NEXT:
-		/* If cursor not initialize, behave as DB_FIRST */
-		if (dbc->internal->pgno == PGNO_INVALID)
-			goto first;
+	case DB_PREV_NODUP:
+	case DB_PREV:
+		/*
+		 * np_inc stores whether to increment or decrement when
+		 * iterating through records on a page and pages in the file.
+		 */
+		if (flags == DB_NEXT_NODUP || flags == DB_NEXT)
+			np_inc = 1;
+		else
+			np_inc = -1;
+		/* If cursor not initialized, behave as DB_FIRST/DB_LAST */
+		if (dbc->internal->pgno == PGNO_INVALID) {
+			if (np_inc == 1)
+				goto first;
+			else
+				goto last;
+		}
 
 		/*
 		 * Acquire the current page with the lock we have already,
@@ -720,108 +741,49 @@ last:		pgno = PGNO_BASE_MD;
 			goto err;
 		dpage = (HEAPPG *)cp->page;
 
-		/* At end of current page, must get next page */
-		if (cp->indx >= HEAP_HIGHINDX(dpage))
+		if (np_inc == 1 && cp->indx >= HEAP_HIGHINDX(dpage))
+			/* At end of current page, must get next page. */
 			getpage = TRUE;
-
-		while (!found) {
-			if (getpage) {
-				pgno = cp->pgno + 1;
-
-				/* Put current page/lock and get next one */
-				ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret);
-				if (ret != 0) {
-					/* Beyond last page? */
-					if (ret == DB_PAGE_NOTFOUND)
-						ret = DB_NOTFOUND;
-					goto err;
-				}
-				dpage = (HEAPPG *)cp->page;
-
-				/*
-				 * If page is a spam page or its a data
-				 * page without entries, try again.
-				 */
-				if (TYPE(dpage) != P_HEAP ||
-				    (TYPE(dpage) == P_HEAP &&
-				    NUM_ENT(dpage) == 0))
-					continue;
-
-				/* When searching, indx gets bumped to 0 */
-				cp->indx = -1;
-				getpage = FALSE;
-			}
-
+		else if (np_inc == -1) {
 			/*
-			 * Bump index and loop through the offset table finding
-			 * first nonzero entry.  If the offset is for a split
-			 * record, make sure it's the first piece of the split
-			 * record. HEAP_HIGHINDX always points to highest filled
-			 * entry on page.
+			 * Loop through indexes and find first used slot.  Check
+			 * if already at the first slot.
 			 */
-			cp->indx++;
-			for (indx=cp->indx;
-			     indx <= HEAP_HIGHINDX(dpage); indx++) {
-				if (HEAP_OFFSETTBL(dbp, dpage)[indx] == 0)
-					continue;
-				hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx);
-				if (!F_ISSET(hdr, HEAP_RECSPLIT) ||
-				    F_ISSET(hdr, HEAP_RECFIRST)) {
-					found = TRUE;
-					cp->indx = indx;
-					break;
-				}
+			for (f_indx=0; (f_indx <= HEAP_HIGHINDX(dpage)) &&
+			    (HEAP_OFFSETTBL(dbp, dpage)[f_indx] == 0); f_indx++)
+			{
+				/* No-op. */
 			}
 
-			/* Nothing of interest on page, so try next */
-			if (!found)
+			/* At the beginning of current page, get new page */
+			if (cp->indx == 0 || cp->indx <= f_indx) {
+				if (cp->pgno == FIRST_HEAP_DPAGE) {
+					ret = DBC_ERR(dbc, DB_NOTFOUND);
+					goto err;
+				}
 				getpage = TRUE;
-		}
-		break;
-	case DB_PREV_NODUP:
-	case DB_PREV:
-		/* If cursor not initialize, behave as DB_LAST */
-		if (dbc->internal->pgno == PGNO_INVALID)
-			goto last;
-
-		/*
-		 * Acquire the current page with the lock we have already,
-		 * unless user has asked for a write lock.
-		 */
-		ACQUIRE_CUR(dbc, lock_type, cp->pgno, 0, 0, ret);
-		if (ret != 0)
-			goto err;
-		dpage = (HEAPPG *)cp->page;
-
-		/*
-		 * Loop through indexes and find first used slot.  Check if
-		 * already at the first slot.
-		 */
-		for (f_indx=0; (f_indx <= HEAP_HIGHINDX(dpage)) &&
-		    (HEAP_OFFSETTBL(dbp, dpage)[f_indx] == 0); f_indx++) ;
-
-		/* At the beginning of current page, must get new page */
-		if (cp->indx == 0 || cp->indx <= f_indx) {
-			if (cp->pgno == FIRST_HEAP_DPAGE) {
-				ret = DB_NOTFOUND;
-				goto err;
 			}
-			getpage = TRUE;
 		}
 
 		while (!found) {
 			if (getpage) {
-				pgno = cp->pgno - 1;
-				/* Do not go past first page */
+				if (np_inc == -1)
+					pgno = cp->pgno - 1;
+				else if (np_inc == 1)
+					pgno = cp->pgno + 1;
 				if (pgno < FIRST_HEAP_DPAGE) {
-					ret = DB_NOTFOUND;
+					ret = DBC_ERR(dbc, DB_NOTFOUND);
 					goto err;
 				}
-				/* Put current page/lock and get prev page. */
+				/* Put current page/lock and get next one */
 				ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret);
-				if (ret != 0)
+				if (ret != 0) {
+					if (np_inc == 1 &&
+					    ret == DB_PAGE_NOTFOUND)
+						/* Beyond last page */
+						ret = DBC_ERR(dbc, DB_NOTFOUND);
 					goto err;
-
+				}
 				dpage = (HEAPPG *)cp->page;
 
 				/*
@@ -833,31 +795,36 @@ last:		pgno = PGNO_BASE_MD;
 				    NUM_ENT(dpage) == 0))
 					continue;
 
-				/* When search, this gets bumped to high indx */
-				cp->indx = HEAP_HIGHINDX(dpage) + 1;
+				if (np_inc == 1)
+					/*
+					 * When searching, indx gets
+					 * bumped to 0
+					 */
+					cp->indx = UINT16_MAX;
+				else
+					/*
+					 * When searching, indx gets bumped to
+					 * high indx
+					 */
+					cp->indx = HEAP_HIGHINDX(dpage) + 1;
 				getpage = FALSE;
 			}
 
 			/*
-			 * Decrement index and loop through the offset table
-			 * finding previous nonzero entry.
+			 * Bump index and loop through the offset table finding
+			 * first nonzero entry.  If the offset is for a split
+			 * record, make sure it's the first piece of the split
+			 * record. HEAP_HIGHINDX always points to highest filled
+			 * entry on page.
 			 */
-			cp->indx--;
-			for (indx=cp->indx;
-			     indx >= 0; indx--) {
-				if (HEAP_OFFSETTBL(dbp, dpage)[indx] == 0)
-					continue;
-				hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx);
-				if (!F_ISSET(hdr, HEAP_RECSPLIT) ||
-				    F_ISSET(hdr, HEAP_RECFIRST)) {
-					found = TRUE;
-					cp->indx = indx;
-					break;
-				}
-			}
-
-			/* Nothing of interest on page, so try previous */
+			if (np_inc == -1)
+				cp->indx--;
+			else if (np_inc == 1)
+				cp->indx++;
+			__heapc_search(dbc,
+			    dpage, cp->indx, np_inc, &cp->indx, &found);
 			if (!found)
+				/* Nothing of interest on page, so try next */
 				getpage = TRUE;
 		}
 		break;
@@ -871,7 +838,7 @@ last:		pgno = PGNO_BASE_MD;
 		/* First make sure we're trying to get a data page. */
 		if (pgno == PGNO_BASE_MD ||
 		    pgno == HEAP_REGION_PGNO(dbp, pgno)) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 
@@ -880,7 +847,7 @@ last:		pgno = PGNO_BASE_MD;
 
 		if (ret != 0) {
 			if (ret == DB_PAGE_NOTFOUND)
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 		dpage = (HEAPPG *)cp->page;
@@ -889,14 +856,14 @@ last:		pgno = PGNO_BASE_MD;
 		if ((indx >  HEAP_HIGHINDX(dpage)) ||
 		    (HEAP_OFFSETTBL(dbp, dpage)[indx] == 0)) {
 			DISCARD(dbc, cp->page, cp->lock, 0, ret);
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 		hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx);
 		if (F_ISSET(hdr, HEAP_RECSPLIT) &&
 		    !F_ISSET(hdr, HEAP_RECFIRST)) {
 			DISCARD(dbc, cp->page, cp->lock, 0, ret);
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 
@@ -911,16 +878,30 @@ last:		pgno = PGNO_BASE_MD;
 				if ((ret = __heapc_gsplit(
 				    dbc, &tmp_val, NULL, 0)) != 0)
 					goto err;
+			} else if (F_ISSET(hdr, HEAP_RECBLOB)) {
+				memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE);
+				blob_id = (db_seq_t)bhdr.id;
+				GET_BLOB_SIZE(dbc->env, bhdr, blob_size, ret);
+				if (ret != 0)
+					goto err;
+				if (blob_size > UINT32_MAX) {
+					ret = DB_BUFFER_SMALL;
+					goto err;
+				}
+				tmp_val.flags = DB_DBT_MALLOC;
+				if ((ret = __blob_get(dbc, &tmp_val,
+				    blob_id, blob_size, NULL, 0)) != 0)
+					goto err;
 			} else {
 				tmp_val.data =
 				    (void *)((u_int8_t *)hdr + sizeof(HEAPHDR));
 				tmp_val.size = hdr->size;
 			}
-			cmp = __bam_defcmp(dbp, &tmp_val, data);
+			cmp = __bam_defcmp(dbp, &tmp_val, data, NULL);
 			if (F_ISSET(&tmp_val, DB_DBT_MALLOC))
 				__os_ufree(dbp->env, tmp_val.data);
 			if (cmp != 0) {
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto err;
 			}
 		}
@@ -928,7 +909,7 @@ last:		pgno = PGNO_BASE_MD;
 		break;
 	case DB_NEXT_DUP:
 	case DB_PREV_DUP:
-		ret = DB_NOTFOUND;
+		ret = DBC_ERR(dbc, DB_NOTFOUND);
 		goto err;
 	default:
 		/* DB_GET_RECNO, DB_JOIN_ITEM, DB_SET_RECNO are invalid */
@@ -959,6 +940,53 @@ err:	if (ret == 0 ) {
 	return (ret);
 }
 
+/*
+ * __heapc_search --
+ *	Search a given a heap page, starting at a given index, for a viable heap
+ *	record.  Return the index of the found record in indxp.
+ */
+static void
+__heapc_search(dbc, dpage, begin, dir, indxp, found)
+     DBC *dbc;
+     HEAPPG *dpage;
+     db_indx_t begin;
+     int dir;
+     db_indx_t *indxp;
+     int *found;
+{
+	DB *dbp;
+	HEAPHDR *hdr;
+	db_indx_t indx;
+
+	dbp = dbc->dbp;
+	DB_ASSERT(dbp->env, dir == -1 || dir == 1);
+
+	*found = FALSE;
+	if (TYPE(dpage) != P_HEAP || NUM_ENT(dpage) == 0)
+		return;
+
+	indx = begin;
+	for (;;) {
+		if (HEAP_OFFSETTBL(dbp, dpage)[indx] != 0) {
+			hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx);
+			if (!F_ISSET(hdr, HEAP_RECSPLIT) ||
+			    F_ISSET(hdr, HEAP_RECFIRST)) {
+				*found = TRUE;
+				*indxp = indx;
+				break;
+			}
+		}
+		if ((dir == -1 && indx == 0) ||
+		    (dir == 1 && indx == HEAP_HIGHINDX(dpage)))
+			break;
+
+		if (dir == -1)
+			indx--;
+		else
+			indx++;
+	}
+}
+
 #undef	IS_FIRST
 #define	IS_FIRST (last_rid.pgno == PGNO_INVALID)
 /*
@@ -993,6 +1021,7 @@ __heapc_reloc_partial(dbc, key, data)
 
 	/* We only work on partial puts. */
 	DB_ASSERT(dbp->env, F_ISSET(data, DB_DBT_PARTIAL));
+	DB_ASSERT(dbp->env, !F_ISSET(old_hdr, HEAP_RECBLOB));
 
 	/*
 	 * Start by calculating the data_size, total size of the new record, and
@@ -1014,7 +1043,7 @@ __heapc_reloc_partial(dbc, key, data)
 			dlen = old_size - doff;
 		else
 			dlen = data->dlen;
-		data_size = old_size - dlen + data->size;
+		data_size = (old_size - dlen) + data->size;
 	}
 
 	/*
@@ -1075,8 +1104,8 @@ __heapc_reloc_partial(dbc, key, data)
 			 */
 			data_size = doff + (add_bytes ? data->size : 0);
 		else
-			data_size = old_hdr->size -
-				dlen + (add_bytes ? data->size : 0);
+			data_size = (old_hdr->size -
+				dlen) + (add_bytes ? data->size : 0);
 		data_size += remaining;
 
 		if (data_size > buflen) {
@@ -1120,7 +1149,7 @@ __heapc_reloc_partial(dbc, key, data)
 			if (doff + dlen < old_hdr->size) {
 				olddata += dlen;
 				memcpy(buf,
-				    olddata, old_hdr->size - doff - dlen);
+				    olddata, (old_hdr->size - doff) - dlen);
 				dlen = 0;
 			} else
 				/*
@@ -1145,8 +1174,8 @@ __heapc_reloc_partial(dbc, key, data)
 			log_dbt.size = DB_ALIGN(
 			    old_hdr->size, sizeof(u_int32_t));
 			if ((ret = __heap_addrem_log(dbp, dbc->txn,
-			    &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno,
-			    (u_int32_t)cp->indx, old_size,
+			    &LSN(cp->page), 0, OP_SET(DB_REM_HEAP, cp->page),
+			    cp->pgno, (u_int32_t)cp->indx, old_size,
 			    &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
 				goto err;
 		} else
@@ -1185,7 +1214,8 @@ __heapc_reloc_partial(dbc, key, data)
 				log_dbt.size = DB_ALIGN(
 				    old_hdr->size, sizeof(u_int32_t));
 				if ((ret = __heap_addrem_log(dbp, dbc->txn,
-				    &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno,
+				    &LSN(cp->page), 0,
+				    OP_SET(DB_REM_HEAP, cp->page), cp->pgno,
 				    (u_int32_t)cp->indx, old_size,
 				    &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
 					goto err;
@@ -1197,7 +1227,8 @@ __heapc_reloc_partial(dbc, key, data)
 
 			if (DBC_LOGGING(dbc)) {
 				if ((ret = __heap_addrem_log(dbp, dbc->txn,
-				    &LSN(cp->page), 0, DB_ADD_HEAP, cp->pgno,
+				    &LSN(cp->page), 0,
+				    OP_SET(DB_ADD_HEAP, cp->page), cp->pgno,
 				    (u_int32_t)cp->indx, old_size,
 				    &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
 					goto err;
@@ -1231,7 +1262,7 @@ __heapc_reloc_partial(dbc, key, data)
 			size -= sizeof(db_indx_t);
 		/* Round down to a multiple of 4. */
 		size = DB_ALIGN(
-		    size - sizeof(u_int32_t) + 1, sizeof(u_int32_t));
+		    (size - sizeof(u_int32_t)) + 1, sizeof(u_int32_t));
 		DB_ASSERT(dbp->env, size >= sizeof(HEAPSPLITHDR));
 
 		/*
@@ -1261,7 +1292,8 @@ __heapc_reloc_partial(dbc, key, data)
 		if (DBC_LOGGING(dbc)) {
 			if ((ret = __heap_addrem_log(dbp,
 			    dbc->txn, &LSN(cp->page), 0,
-			    DB_ADD_HEAP, cp->pgno, (u_int32_t)cp->indx,
+			    OP_SET(DB_ADD_HEAP, cp->page), cp->pgno,
+			    (u_int32_t)cp->indx,
 			    size, &hdr_dbt, &t_data, &LSN(cp->page))) != 0)
 				goto err;
 		} else
@@ -1343,7 +1375,8 @@ next_pg:	last_rid.pgno = cp->pgno;
 			log_dbt.size = DB_ALIGN(
 			    old_hdr->size, sizeof(u_int32_t));
 			if ((ret = __heap_addrem_log(dbp, dbc->txn,
-			    &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno,
+			    &LSN(cp->page), 0,
+			    OP_SET(DB_REM_HEAP, cp->page), cp->pgno,
 			    (u_int32_t)cp->indx, old_size,
 			    &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
 				goto err;
@@ -1355,7 +1388,8 @@ next_pg:	last_rid.pgno = cp->pgno;
 
 		if (DBC_LOGGING(dbc)) {
 			if ((ret = __heap_addrem_log(dbp, dbc->txn,
-			    &LSN(cp->page), 0, DB_ADD_HEAP, cp->pgno,
+			    &LSN(cp->page), 0,
+			    OP_SET(DB_ADD_HEAP, cp->page), cp->pgno,
 			    (u_int32_t)cp->indx, old_size,
 			    &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
 				goto err;
@@ -1397,6 +1431,8 @@ __heapc_reloc(dbc, key, data)
 	memset(&hdr_dbt, 0, sizeof(DBT));
 	memset(&log_dbt, 0, sizeof(DBT));
 	COMPQUIET(key, NULL);
+	/* Blob database records never change size. */
+	DB_ASSERT(dbp->env, !F_ISSET(old_hdr, HEAP_RECBLOB));
 
 	/*
 	 * We are updating an existing record, which will grow into a split
@@ -1436,7 +1472,8 @@ __heapc_reloc(dbc, key, data)
 			log_dbt.size = DB_ALIGN(
 			    old_hdr->size, sizeof(u_int32_t));
 			if ((ret = __heap_addrem_log(dbp, dbc->txn,
-			    &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno,
+			    &LSN(cp->page), 0,
+			    OP_SET(DB_REM_HEAP, cp->page), cp->pgno,
 			    (u_int32_t)cp->indx, old_size,
 			    &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
 				goto err;
@@ -1471,7 +1508,7 @@ __heapc_reloc(dbc, key, data)
 			size -= sizeof(db_indx_t);
 		/* Round down to a multiple of 4. */
 		size = DB_ALIGN(
-		    size - sizeof(u_int32_t) + 1, sizeof(u_int32_t));
+		    (size - sizeof(u_int32_t)) + 1, sizeof(u_int32_t));
 		DB_ASSERT(dbp->env, size >= sizeof(HEAPSPLITHDR));
 		new_hdr.std_hdr.size =
 		    (u_int16_t)(size - sizeof(HEAPSPLITHDR));
@@ -1495,7 +1532,8 @@ __heapc_reloc(dbc, key, data)
 		if (DBC_LOGGING(dbc)) {
 			if ((ret = __heap_addrem_log(dbp,
 			    dbc->txn, &LSN(cp->page), 0,
-			    DB_ADD_HEAP, cp->pgno, (u_int32_t)cp->indx,
+			    OP_SET(DB_ADD_HEAP, cp->page),
+			    cp->pgno, (u_int32_t)cp->indx,
 			    size, &hdr_dbt, &t_data, &LSN(cp->page))) != 0)
 				goto err;
 		} else
@@ -1565,7 +1603,8 @@ next_pg:	if (next_rid.pgno != PGNO_INVALID) {
 			log_dbt.size = DB_ALIGN(
 			    old_hdr->size, sizeof(u_int32_t));
 			if ((ret = __heap_addrem_log(dbp, dbc->txn,
-			    &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno,
+			    &LSN(cp->page), 0,
+			    OP_SET(DB_REM_HEAP, cp->page), cp->pgno,
 			    (u_int32_t)cp->indx, old_size,
 			    &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
 				goto err;
@@ -1577,7 +1616,8 @@ next_pg:	if (next_rid.pgno != PGNO_INVALID) {
 
 		if (DBC_LOGGING(dbc)) {
 			if ((ret = __heap_addrem_log(dbp, dbc->txn,
-			    &LSN(cp->page), 0, DB_ADD_HEAP, cp->pgno,
+			    &LSN(cp->page), 0,
+			    OP_SET(DB_ADD_HEAP, cp->page), cp->pgno,
 			    (u_int32_t)cp->indx,old_size,
 			    &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
 				goto err;
@@ -1608,20 +1648,26 @@ __heapc_put(dbc, key, data, flags, pgnop)
 	DB *dbp;
 	DBT hdr_dbt, log_dbt, new_data;
 	DB_MPOOLFILE *mpf;
+	HEAPBLOBHDR bhdr;
 	HEAPHDR hdr, *old_hdr;
 	HEAP_CURSOR *cp;
 	PAGE *rpage;
 	db_pgno_t region_pgno;
-	int oldspace, ret, space, t_ret;
-	u_int32_t data_size, dlen, new_size, old_flags, old_size, tot_size;
-	u_int8_t *buf, *olddata, *src, *dest;
+	int buf_alloc, ret, t_ret;
+	off_t blob_size;
+	db_seq_t blob_id, new_blob_id;
+	u_int32_t data_size, dlen, new_size, old_flags, old_size;
+	u_int32_t oldspace, space, tot_size;
+	u_int8_t *buf, *olddata;
 
 	dbp = dbc->dbp;
 	mpf = dbp->mpf;
 	cp = (HEAP_CURSOR *)dbc->internal;
 	rpage = NULL;
-	buf = dest = src = NULL;
+	buf = NULL;
+	buf_alloc = 0;
 	dlen = 0;
+	blob_id = new_blob_id = 0;
 
 	if (flags != DB_CURRENT) {
 		/* We're going to write following the get, so use RMW. */
@@ -1668,7 +1714,8 @@ __heapc_put(dbc, key, data, flags, pgnop)
 	    DB_ALIGN(old_hdr->size + HEAP_HDRSIZE(old_hdr), sizeof(u_int32_t));
 	if (old_size < sizeof(HEAPSPLITHDR))
 		old_size = sizeof(HEAPSPLITHDR);
-	if (F_ISSET(data, DB_DBT_PARTIAL)) {
+	/* Partial puts on blobs are dealt with in the blob code. */
+	if (F_ISSET(data, DB_DBT_PARTIAL) && !F_ISSET(old_hdr, HEAP_RECBLOB)) {
 		if (F_ISSET(old_hdr, HEAP_RECSPLIT))
 			tot_size = ((HEAPSPLITHDR *)old_hdr)->tsize;
 		else
@@ -1682,9 +1729,11 @@ __heapc_put(dbc, key, data, flags, pgnop)
 				dlen = tot_size - data->doff;
 			else
 				dlen = data->dlen;
-			data_size = tot_size - dlen + data->size;
+			data_size = (tot_size - dlen) + data->size;
 		}
-	} else
+	} else if F_ISSET(old_hdr, HEAP_RECBLOB)
+		data_size = HEAPBLOBREC_DSIZE;
+	else
 		data_size = data->size;
 	new_size = DB_ALIGN(data_size + sizeof(HEAPHDR), sizeof(u_int32_t));
 	if (new_size < sizeof(HEAPSPLITHDR))
@@ -1694,6 +1743,8 @@ __heapc_put(dbc, key, data, flags, pgnop)
 	if (F_ISSET(old_hdr, HEAP_RECSPLIT) ||
 	    (new_size > old_size &&
 	    new_size - old_size > HEAP_FREESPACE(dbp, cp->page))) {
+		/* Blob database records never change size. */
+		DB_ASSERT(dbp->env, !F_ISSET(old_hdr, HEAP_RECBLOB));
 		/*
 		 * We've got to split the record, not enough room on the
 		 * page.  Splitting the record will remove old_size bytes and
@@ -1707,13 +1758,14 @@ __heapc_put(dbc, key, data, flags, pgnop)
 
 	memset(&new_data, 0, sizeof(DBT));
 	new_data.size = data_size;
-	if (F_ISSET(data, DB_DBT_PARTIAL)) {
+	if (F_ISSET(data, DB_DBT_PARTIAL) && !F_ISSET(old_hdr, HEAP_RECBLOB)) {
 		/*
 		 * Before replacing the old data, we need to use it to build the
 		 * new data.
 		 */
 		if ((ret = __os_malloc(dbp->env, data_size, &buf)) != 0)
 			goto err;
+		buf_alloc = 1;
 		new_data.data = buf;
 
 		/*
@@ -1736,10 +1788,32 @@ __heapc_put(dbc, key, data, flags, pgnop)
 		buf += data->size;
 
 		/* Fill in remaining data from the old record, skipping dlen. */
-		if (data->doff < old_hdr->size) {
+		if ((data->doff + data->dlen) < old_hdr->size) {
 			olddata += data->doff + data->dlen;
-			memcpy(buf,
-			    olddata, old_hdr->size - data->doff - data->dlen);
+			memcpy(buf, olddata,
+			    (old_hdr->size - data->doff) - data->dlen);
+		}
+	} else if (F_ISSET(old_hdr, HEAP_RECBLOB)) {
+		data_size = HEAPBLOBREC_DSIZE;
+		new_data.size = HEAPBLOBREC_DSIZE;
+		if (F_ISSET(data, DB_DBT_BLOB_REC)) {
+			DB_ASSERT(dbp->env,
+			    F_ISSET(((HEAPHDR *)data->data), HEAP_RECBLOB));
+			new_data.data = HEAPBLOBREC_DATA(data->data);
+		} else {
+			memcpy(&bhdr, old_hdr, HEAPBLOBREC_SIZE);
+			blob_id = (db_seq_t)bhdr.id;
+			GET_BLOB_SIZE(dbp->env, bhdr, blob_size, ret);
+			if (ret != 0)
+				goto err;
+			if ((ret = __blob_repl(dbc,
+			    data, blob_id, &new_blob_id, &blob_size)) != 0)
+				goto err;
+			bhdr.std_hdr.flags = HEAP_RECBLOB;
+			bhdr.std_hdr.size = HEAPBLOBREC_DSIZE;
+			SET_BLOB_SIZE(&bhdr, blob_size, HEAPBLOBHDR);
+			SET_BLOB_ID(&bhdr, new_blob_id, HEAPBLOBHDR);
+			new_data.data = HEAPBLOBREC_DATA(&bhdr);
 		}
 	} else {
 		new_data.data = data->data;
@@ -1751,19 +1825,23 @@ __heapc_put(dbc, key, data, flags, pgnop)
 	 */
 	memset(&hdr, 0, sizeof(HEAPHDR));
 	hdr.size = data_size;
+	if (F_ISSET(old_hdr, HEAP_RECBLOB))
+		hdr.flags = HEAP_RECBLOB;
 	if (DBC_LOGGING(dbc)) {
 		hdr_dbt.data = old_hdr;
 		hdr_dbt.size = HEAP_HDRSIZE(old_hdr);
 		log_dbt.data = (u_int8_t *)old_hdr + hdr_dbt.size;
 		log_dbt.size = DB_ALIGN(old_hdr->size, sizeof(u_int32_t));
 		if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page),
-		    0, DB_REM_HEAP, cp->pgno, (u_int32_t)cp->indx,
+		    0, OP_SET(DB_REM_HEAP, cp->page), cp->pgno,
+		    (u_int32_t)cp->indx,
 		    old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0)
 			goto err;
 		hdr_dbt.data = &hdr;
 		hdr_dbt.size = HEAP_HDRSIZE(&hdr);
 		if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page),
-		    0, DB_ADD_HEAP, cp->pgno, (u_int32_t)cp->indx,
+		    0, OP_SET(DB_ADD_HEAP, cp->page), cp->pgno,
+		    (u_int32_t)cp->indx,
 		    new_size, &hdr_dbt, &new_data, &LSN(cp->page))) != 0)
 			goto err;
 	} else
@@ -1788,14 +1866,14 @@ __heapc_put(dbc, key, data, flags, pgnop)
 		    dbc->thread_info, NULL, DB_MPOOL_DIRTY, &rpage)) != 0)
 			goto err;
 
-		HEAP_SETSPACE(dbp, rpage, cp->pgno - region_pgno - 1, space);
+		HEAP_SETSPACE(dbp, rpage, (cp->pgno - region_pgno) - 1, space);
 	}
 
 err:	DB_ASSERT(dbp->env, ret != DB_PAGE_NOTFOUND);
 	if (rpage != NULL && (t_ret = __memp_fput(mpf,
 	    dbc->thread_info, rpage, dbc->priority)) != 0 && ret == 0)
 		ret = t_ret;
-	if (F_ISSET(data, DB_DBT_PARTIAL))
+	if (buf_alloc)
 		__os_free(dbp->env, new_data.data);
 
 	if (ret != 0 && LOCK_ISSET(cp->lock))
@@ -1823,18 +1901,21 @@ __heap_getpage(dbc, size, avail)
 	HEAP *h;
 	HEAPPG *rpage;
 	HEAP_CURSOR *cp;
-	db_pgno_t data_pgno, *lkd_pgs, meta_pgno, region_pgno, start_region;
-	int i, lk_mode, max, p, ret, space, start, t_ret;
+	db_pgno_t data_pgno, i, max, meta_pgno, p, region_pgno, start;
+	db_pgno_t start_region;
+	int ret, t_ret;
+	u_int32_t lk_mode, space;
 
 	LOCK_INIT(meta_lock);
+	data_pgno = PGNO_INVALID;
 	dbp = dbc->dbp;
 	mpf = dbp->mpf;
 	cp = (HEAP_CURSOR *)dbc->internal;
 	h = dbp->heap_internal;
 	start_region = region_pgno = h->curregion;
 	max = HEAP_REGION_SIZE(dbp);
-	i = ret = t_ret = 0;
-	lkd_pgs = NULL;
+	i = 0;
+	ret = t_ret = 0;
 
 	/*
 	 * The algorithm for finding a page:
@@ -1897,10 +1978,10 @@ find:	while ((ret = __memp_fget(mpf, &region_pgno,
 		max = h->maxpgno - region_pgno;
 	/*
 	 * Look in the bitmap for a page with sufficient free space.  We use i
-	 * in a slightly strange way.  Because the 2-bits in the bitmap are only
-	 * an estimate, there is a chance the data won't fit on the page we
-	 * choose.  In that case, we re-start the process and want to be able to
-	 * resume this loop where we left off.
+	 * in a slightly strange way.  Because the 2-bits in the bitmap are
+	 * only an estimate, there is a chance the data won't fit on the page
+	 * we choose.  In that case, we re-start the process and want to be
+	 * able to resume this loop where we left off.
 	 */
 	for (; i < max; i++) {
 		p = start + i;
@@ -1908,7 +1989,7 @@ find:	while ((ret = __memp_fget(mpf, &region_pgno,
 			p -= max;
 		if ((*avail = HEAP_SPACE(dbp, rpage, p)) > space)
 			continue;
-		data_pgno = region_pgno + p + 1;
+		data_pgno = (region_pgno + p) + 1;
 		ACQUIRE_CUR(dbc,
 		    DB_LOCK_WRITE, data_pgno, DB_LOCK_NOWAIT, 0, ret);
 		/*
@@ -2071,7 +2152,7 @@ pg_err:				if (p != 0) {
 		if (ret == DB_LOCK_NOTGRANTED)
 			ret = 0;
 		else if (ret != 0) {
-			/* 
+			/*
 			 * Free up the metadata lock.  If this was an error
 			 * other than a missing region page, bail.
 			 */
@@ -2165,7 +2246,7 @@ check:		if (size + sizeof(db_indx_t) > HEAP_FREESPACE(dbp, cp->page)) {
 		}
 	}
 
-	h->curpgindx = data_pgno - region_pgno - 1;
+	h->curpgindx = (data_pgno - region_pgno) - 1;
 err:	DB_ASSERT(dbp->env, ret != DB_PAGE_NOTFOUND);
 	if (rpage != NULL && (t_ret = __memp_fput(mpf,
 	    dbc->thread_info, rpage, dbc->priority)) != 0 && ret == 0)
@@ -2187,26 +2268,40 @@ __heap_append(dbc, key, data)
 	DBT *data, *key;
 {
 	DB *dbp;
-	DBT tmp_dbt;
+	DBT tmp_dbt, data_dbt;
 	DB_HEAP_RID rid;
+	DB_LSN lsn;
 	DB_MPOOLFILE *mpf;
 	HEAPPG *rpage;
+	HEAPBLOBHDR bhdr;
 	HEAPHDR hdr;
 	HEAP_CURSOR *cp;
 	db_indx_t indx;
 	db_pgno_t region_pgno;
-	int ret, space, t_ret;
+	int is_blob, ret, t_ret;
+	off_t blob_size;
+	db_seq_t blob_id;
 	u_int8_t avail;
-	u_int32_t data_size;
+	u_int32_t data_size, space;
 
 	dbp = dbc->dbp;
 	mpf = dbp->mpf;
 	ret = t_ret = 0;
 	rpage = NULL;
 	cp = (HEAP_CURSOR *)dbc->internal;
+	blob_size = 0;
+	blob_id = 0;
+
+	if (dbp->blob_threshold &&
+	    (data->size >= dbp->blob_threshold || F_ISSET(data, DB_DBT_BLOB)))
+		is_blob = 1;
+	else
+		is_blob = 0;
 
 	/* Need data.size + header size, 4-byte aligned. */
-	if (F_ISSET(data, DB_DBT_PARTIAL))
+	if (is_blob)
+		data_size = HEAPBLOBREC_SIZE;
+	else if (F_ISSET(data, DB_DBT_PARTIAL))
 		data_size = DB_ALIGN(data->doff +
 		    data->size + sizeof(HEAPHDR), sizeof(u_int32_t));
 	else
@@ -2222,24 +2317,42 @@ __heap_append(dbc, key, data)
 		goto err;
 
 	indx = HEAP_FREEINDX(cp->page);
-	memset(&hdr, 0, sizeof(HEAPHDR));
-	hdr.size = data->size;
-	if (F_ISSET(data, DB_DBT_PARTIAL))
-		hdr.size += data->doff;
-	tmp_dbt.data = &hdr;
-	tmp_dbt.size = sizeof(HEAPHDR);
+	if (is_blob) {
+		if ((ret = __blob_put(
+		    dbc, data, &blob_id, &blob_size, &lsn)) != 0)
+			goto err;
+		memset(&bhdr, 0, HEAPBLOBREC_SIZE);
+		bhdr.std_hdr.flags = HEAP_RECBLOB;
+		bhdr.std_hdr.size = HEAPBLOBREC_DSIZE;
+		SET_BLOB_SIZE(&bhdr, blob_size, HEAPBLOBHDR);
+		SET_BLOB_ID(&bhdr, blob_id, HEAPBLOBHDR);
+		SET_BLOB_FILE_ID(&bhdr, dbp->blob_file_id, HEAPBLOBHDR);
+		tmp_dbt.data = &bhdr;
+		tmp_dbt.size = sizeof(HEAPHDR);
+		memset(&data_dbt, 0, sizeof(DBT));
+		data_dbt.data = HEAPBLOBREC_DATA((&bhdr));
+		data_dbt.size = HEAPBLOBREC_DSIZE;
+	} else {
+		memset(&hdr, 0, sizeof(HEAPHDR));
+		hdr.size = data->size;
+		if (F_ISSET(data, DB_DBT_PARTIAL))
+			hdr.size += data->doff;
+		tmp_dbt.data = &hdr;
+		tmp_dbt.size = sizeof(HEAPHDR);
+		memcpy(&data_dbt, data, sizeof(DBT));
+	}
 
 	/* Log the write. */
 	if (DBC_LOGGING(dbc)) {
 		if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page),
-		    0, DB_ADD_HEAP, cp->pgno, (u_int32_t)indx,
-		    data_size, &tmp_dbt, data, &LSN(cp->page))) != 0)
+		    0, OP_SET(DB_ADD_HEAP, cp->page), cp->pgno, (u_int32_t)indx,
+		    data_size, &tmp_dbt, &data_dbt, &LSN(cp->page))) != 0)
 			goto err;
 	} else
 		LSN_NOT_LOGGED(LSN(cp->page));
 
 	if ((ret = __heap_pitem(
-	    dbc, (PAGE *)cp->page, indx, data_size, &tmp_dbt, data)) != 0)
+	    dbc, (PAGE *)cp->page, indx, data_size, &tmp_dbt, &data_dbt)) != 0)
 		goto err;
 
 	rid.pgno = cp->pgno;
@@ -2256,7 +2369,7 @@ __heap_append(dbc, key, data)
 		    dbc->thread_info, NULL, DB_MPOOL_DIRTY, &rpage)) != 0)
 			goto err;
 
-		HEAP_SETSPACE(dbp, rpage, cp->pgno - region_pgno - 1, space);
+		HEAP_SETSPACE(dbp, rpage, (cp->pgno - region_pgno) - 1, space);
 	}
 
 err:	DB_ASSERT(dbp->env, ret != DB_PAGE_NOTFOUND);
@@ -2292,8 +2405,8 @@ __heapc_split(dbc, key, data, is_first)
 	HEAP_CURSOR *cp;
 	db_indx_t indx;
 	db_pgno_t region_pgno;
-	int ret, spacebits, t_ret;
-	u_int32_t buflen, doff, left, size;
+	int ret, t_ret;
+	u_int32_t buflen, doff, left, size, spacebits;
 	u_int8_t availbits, *buf;
 
 	dbp = dbc->dbp;
@@ -2308,7 +2421,6 @@ __heapc_split(dbc, key, data, is_first)
 	ret = t_ret = 0;
 	indx = 0;
 	buf = NULL;
-	buflen = 0;
 
 	/*
 	 * Write the record to multiple pages, in chunks starting from the end.
@@ -2322,6 +2434,9 @@ __heapc_split(dbc, key, data, is_first)
 		left += data->doff;
 	}
 	hdrs.tsize = left;
+	buflen = 1;
+	if ((ret = __os_malloc(dbp->env, buflen, &buf)) != 0)
+		return (ret);
 	while (left > 0) {
 		size = DB_ALIGN(left + sizeof(HEAPSPLITHDR), sizeof(u_int32_t));
 		if (size < sizeof(HEAPSPLITHDR))
@@ -2336,8 +2451,10 @@ __heapc_split(dbc, key, data, is_first)
 		else
 			hdrs.std_hdr.flags |= HEAP_RECFIRST;
 
-		if ((ret = __heap_getpage(dbc, size, &availbits)) != 0)
+		if ((ret = __heap_getpage(dbc, size, &availbits)) != 0) {
+			__os_free(dbp->env, buf);
 			return (ret);
+		}
 
 		/*
 		 * size is the total number of bytes being written to the page.
@@ -2363,7 +2480,7 @@ __heapc_split(dbc, key, data, is_first)
 				size -= sizeof(db_indx_t);
 			/* Round down to a multiple of 4. */
 			size = DB_ALIGN(
-			    size - sizeof(u_int32_t) + 1, sizeof(u_int32_t));
+			    (size - sizeof(u_int32_t)) + 1, sizeof(u_int32_t));
 			DB_ASSERT(dbp->env, size >= sizeof(HEAPSPLITHDR));
 			hdrs.std_hdr.size =
 			    (u_int16_t)(size - sizeof(HEAPSPLITHDR));
@@ -2401,10 +2518,10 @@ __heapc_split(dbc, key, data, is_first)
 			 * page minus the bytes we're taking from data.
 			*/
 			t_data.data = buf;
-			memset(buf, '\0', t_data.size - left + doff);
-			buf += t_data.size - left + doff;
+			memset(buf, 0, (t_data.size - left) + doff);
+			buf += (t_data.size - left) + doff;
 			memcpy(buf, data->data, left - doff);
-			doff -= t_data.size - left + doff;
+			doff -= (t_data.size - left) + doff;
 			buf = t_data.data;
 		}
 		hdr_dbt.data = &hdrs;
@@ -2415,7 +2532,8 @@ __heapc_split(dbc, key, data, is_first)
 		if (DBC_LOGGING(dbc)) {
 			if ((ret = __heap_addrem_log(dbp,
 			    dbc->txn, &LSN(cp->page), 0,
-			    DB_ADD_HEAP, cp->pgno, (u_int32_t)indx,
+			    OP_SET(DB_ADD_HEAP, cp->page),
+			    cp->pgno, (u_int32_t)indx,
 			    size, &hdr_dbt, &t_data, &LSN(cp->page))) != 0)
 				goto err;
 		} else
@@ -2447,7 +2565,7 @@ __heapc_split(dbc, key, data, is_first)
 				goto err;
 
 			HEAP_SETSPACE(dbp,
-			    rpage, cp->pgno - region_pgno - 1, spacebits);
+			    rpage, (cp->pgno - region_pgno) - 1, spacebits);
 			ret = __memp_fput(mpf,
 			    dbc->thread_info, rpage, dbc->priority);
 			rpage = NULL;
diff --git a/src/heap/heap.src b/src/heap/heap.src
index 47bd4bb0..a08ad5eb 100644
--- a/src/heap/heap.src
+++ b/src/heap/heap.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -29,7 +29,29 @@ INCLUDE
  * dbt:		data that is to be added or deleted.
  * pagelsn:	former lsn of the page.
  */
-BEGIN addrem		49	151
+BEGIN addrem		61	151
+OP	opcode		u_int32_t	lu
+DB	fileid		int32_t		ld
+ARG	pgno		db_pgno_t	lu
+ARG	indx		u_int32_t	lu
+ARG	nbytes		u_int32_t	lu
+HDR	hdr		DBT		s
+DATA	dbt		DBT		s
+POINTER	pagelsn		DB_LSN *	lu
+END
+
+BEGIN_COMPAT addrem		60	151
+OP	opcode		u_int32_t	lu
+DB	fileid		int32_t		ld
+ARG	pgno		db_pgno_t	lu
+ARG	indx		u_int32_t	lu
+ARG	nbytes		u_int32_t	lu
+HDR	hdr		DBT		s
+DBT	dbt		DBT		s
+POINTER	pagelsn		DB_LSN *	lu
+END
+
+BEGIN_COMPAT addrem		50	151
 ARG	opcode		u_int32_t	lu
 DB	fileid		int32_t		ld
 ARG	pgno		db_pgno_t	lu
diff --git a/src/heap/heap_auto.c b/src/heap/heap_auto.c
index 1cb705f4..9fdcce7a 100644
--- a/src/heap/heap_auto.c
+++ b/src/heap/heap_auto.c
@@ -9,16 +9,38 @@
 #include "dbinc/txn.h"
 
 DB_LOG_RECSPEC __heap_addrem_desc[] = {
-	{LOGREC_ARG, SSZ(__heap_addrem_args, opcode), "opcode", "%lu"},
+	{LOGREC_OP, SSZ(__heap_addrem_args, opcode), "opcode", "%lu"},
 	{LOGREC_DB, SSZ(__heap_addrem_args, fileid), "fileid", ""},
 	{LOGREC_ARG, SSZ(__heap_addrem_args, pgno), "pgno", "%lu"},
 	{LOGREC_ARG, SSZ(__heap_addrem_args, indx), "indx", "%lu"},
 	{LOGREC_ARG, SSZ(__heap_addrem_args, nbytes), "nbytes", "%lu"},
-	{LOGREC_DBT, SSZ(__heap_addrem_args, hdr), "hdr", ""},
-	{LOGREC_DBT, SSZ(__heap_addrem_args, dbt), "dbt", ""},
+	{LOGREC_HDR, SSZ(__heap_addrem_args, hdr), "hdr", ""},
+	{LOGREC_DATA, SSZ(__heap_addrem_args, dbt), "dbt", ""},
 	{LOGREC_POINTER, SSZ(__heap_addrem_args, pagelsn), "pagelsn", ""},
 	{LOGREC_Done, 0, "", ""}
 };
+DB_LOG_RECSPEC __heap_addrem_60_desc[] = {
+	{LOGREC_OP, SSZ(__heap_addrem_60_args, opcode), "opcode", "%lu"},
+	{LOGREC_DB, SSZ(__heap_addrem_60_args, fileid), "fileid", ""},
+	{LOGREC_ARG, SSZ(__heap_addrem_60_args, pgno), "pgno", "%lu"},
+	{LOGREC_ARG, SSZ(__heap_addrem_60_args, indx), "indx", "%lu"},
+	{LOGREC_ARG, SSZ(__heap_addrem_60_args, nbytes), "nbytes", "%lu"},
+	{LOGREC_HDR, SSZ(__heap_addrem_60_args, hdr), "hdr", ""},
+	{LOGREC_DBT, SSZ(__heap_addrem_60_args, dbt), "dbt", ""},
+	{LOGREC_POINTER, SSZ(__heap_addrem_60_args, pagelsn), "pagelsn", ""},
+	{LOGREC_Done, 0, "", ""}
+};
+DB_LOG_RECSPEC __heap_addrem_50_desc[] = {
+	{LOGREC_ARG, SSZ(__heap_addrem_50_args, opcode), "opcode", "%lu"},
+	{LOGREC_DB, SSZ(__heap_addrem_50_args, fileid), "fileid", ""},
+	{LOGREC_ARG, SSZ(__heap_addrem_50_args, pgno), "pgno", "%lu"},
+	{LOGREC_ARG, SSZ(__heap_addrem_50_args, indx), "indx", "%lu"},
+	{LOGREC_ARG, SSZ(__heap_addrem_50_args, nbytes), "nbytes", "%lu"},
+	{LOGREC_DBT, SSZ(__heap_addrem_50_args, hdr), "hdr", ""},
+	{LOGREC_DBT, SSZ(__heap_addrem_50_args, dbt), "dbt", ""},
+	{LOGREC_POINTER, SSZ(__heap_addrem_50_args, pagelsn), "pagelsn", ""},
+	{LOGREC_Done, 0, "", ""}
+};
 DB_LOG_RECSPEC __heap_pg_alloc_desc[] = {
 	{LOGREC_DB, SSZ(__heap_pg_alloc_args, fileid), "fileid", ""},
 	{LOGREC_POINTER, SSZ(__heap_pg_alloc_args, meta_lsn), "meta_lsn", ""},
diff --git a/src/heap/heap_autop.c b/src/heap/heap_autop.c
index b767203b..ac08441b 100644
--- a/src/heap/heap_autop.c
+++ b/src/heap/heap_autop.c
@@ -28,6 +28,40 @@ __heap_addrem_print(env, dbtp, lsnp, notused2, info)
 }
 
 /*
+ * PUBLIC: int __heap_addrem_60_print __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__heap_addrem_60_print(env, dbtp, lsnp, notused2, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *info;
+{
+	COMPQUIET(notused2, DB_TXN_PRINT);
+
+	return (__log_print_record(env, dbtp, lsnp, "__heap_addrem_60", __heap_addrem_60_desc, info));
+}
+
+/*
+ * PUBLIC: int __heap_addrem_50_print __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__heap_addrem_50_print(env, dbtp, lsnp, notused2, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *info;
+{
+	COMPQUIET(notused2, DB_TXN_PRINT);
+
+	return (__log_print_record(env, dbtp, lsnp, "__heap_addrem_50", __heap_addrem_50_desc, info));
+}
+
+/*
  * PUBLIC: int __heap_pg_alloc_print __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
diff --git a/src/heap/heap_backup.c b/src/heap/heap_backup.c
index 4588b0ba..77b0eaaa 100644
--- a/src/heap/heap_backup.c
+++ b/src/heap/heap_backup.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2011, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2011, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/heap/heap_conv.c b/src/heap/heap_conv.c
index 9f432d13..dbf059a4 100644
--- a/src/heap/heap_conv.c
+++ b/src/heap/heap_conv.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 
 #include "db_config.h"
@@ -86,7 +86,10 @@ __heap_mswap(env, pg)
 	SWAP32(p);		/* gbytes */
 	SWAP32(p);		/* bytes */
 	SWAP32(p);		/* region_size */
-	p += 92 * sizeof(u_int32_t); /* unused */
+	SWAP32(p);		/* threshold */
+	SWAP32(p);		/* file id lo */
+	SWAP32(p);		/* file id hi */
+	p += 89 * sizeof(u_int32_t); /* unused */
 	SWAP32(p);		/* crypto_magic */
 
 	return (0);
diff --git a/src/heap/heap_method.c b/src/heap/heap_method.c
index f938b5e7..2667f4fe 100644
--- a/src/heap/heap_method.c
+++ b/src/heap/heap_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -49,15 +49,11 @@ __heap_db_close(dbp)
 	DB *dbp;
 {
 	HEAP *h;
-	int ret;
-
-	ret = 0;
-	if ((h = dbp->heap_internal) == NULL)
-		return (0);
-
-	__os_free(dbp->env, h);
-	dbp->heap_internal = NULL;
 
+	if ((h = dbp->heap_internal) != NULL) {
+		__os_free(dbp->env, h);
+		dbp->heap_internal = NULL;
+	}
 	return (0);
 }
 
diff --git a/src/heap/heap_open.c b/src/heap/heap_open.c
index 6827450d..f5bb72ae 100644
--- a/src/heap/heap_open.c
+++ b/src/heap/heap_open.c
@@ -1,19 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/crypto.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_swap.h"
 #include "dbinc/fop.h"
 #include "dbinc/heap.h"
 #include "dbinc/lock.h"
-#include "dbinc/log.h"
 #include "dbinc/mp.h"
 
 static void __heap_init_meta __P((DB *, HEAPMETA *, db_pgno_t, DB_LSN*));
@@ -82,6 +82,7 @@ __heap_metachk(dbp, name, hm)
 
 	env = dbp->env;
 	h = (HEAP *)dbp->heap_internal;
+	ret = 0;
 
 	/*
 	 * At this point, all we know is that the magic number is for a Heap.
@@ -92,6 +93,7 @@ __heap_metachk(dbp, name, hm)
 		M_32_SWAP(vers);
 	switch (vers) {
 	case 1:
+	case 2:
 		break;
 	default:
 		__db_errx(env,
@@ -116,6 +118,26 @@ __heap_metachk(dbp, name, hm)
 	/* Set the page size. */
 	dbp->pgsize = hm->dbmeta.pagesize;
 
+	dbp->blob_threshold = hm->blob_threshold;
+	GET_BLOB_FILE_ID(env, hm, dbp->blob_file_id, ret);
+	if (ret != 0)
+		return (ret);
+	/* Blob databases must be upgraded. */
+	if (vers == 1 && dbp->blob_file_id != 0) {
+	    __db_errx(env, DB_STR_A("1209",
+"%s: databases that support blobs must be upgraded.", "%s"),
+		    name);
+		return (EINVAL);
+	}
+#ifndef HAVE_64BIT_TYPES
+	if (dbp->blob_file_id != 0) {
+		__db_errx(env, DB_STR_A("1205",
+		    "%s: blobs require 64 integer compiler support.", "%s"),
+		    name);
+		return (EINVAL);
+	}
+#endif
+
 	/* Copy the file's ID. */
 	memcpy(dbp->fileid, hm->dbmeta.uid, DB_FILE_ID_LEN);
 
@@ -179,7 +201,8 @@ __heap_read_meta(dbp, ip, txn, meta_pgno, flags)
 		h->region_size = meta->region_size;
 
 		if (PGNO(meta) == PGNO_BASE_MD && !F_ISSET(dbp, DB_AM_RECOVER))
-			__memp_set_last_pgno(mpf, meta->dbmeta.last_pgno);
+			(void)__memp_set_last_pgno(
+			    mpf, meta->dbmeta.last_pgno);
 	} else {
 		DB_ASSERT(dbp->env,
 		    IS_RECOVERING(dbp->env) || F_ISSET(dbp, DB_AM_RECOVER));
@@ -285,6 +308,12 @@ __heap_new_file(dbp, ip, txn, fhp, name)
 		pginfo.type = dbp->type;
 		pdbt.data = &pginfo;
 		pdbt.size = sizeof(pginfo);
+		if (dbp->blob_threshold) {
+			if ((ret = __blob_generate_dir_ids(
+			    dbp, txn, &dbp->blob_file_id)) != 0)
+				return (ret);
+
+		}
 		if ((ret = __os_calloc(env, 1, dbp->pgsize, &buf)) != 0)
 			return (ret);
 		meta = (HEAPMETA *)buf;
@@ -394,7 +423,9 @@ done:	if (region != NULL && (t_ret = __memp_fput(mpf,
 	    dbc->thread_info, region, dbc->priority)) != 0 && ret == 0)
 		ret = t_ret;
 
-	ret = __memp_fput(mpf, dbc->thread_info, meta, dbc->priority);
+	if ((t_ret = __memp_fput(mpf,
+	    dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0)
+		ret = t_ret;
 	if ((t_ret = __TLPUT(dbc, meta_lock)) != 0 && ret == 0)
 		ret = t_ret;
 
@@ -436,4 +467,6 @@ __heap_init_meta(dbp, meta, pgno, lsnp)
 	meta->region_size = h->region_size;
 	meta->nregions = 1;
 	meta->curregion = 1;
+	meta->blob_threshold = dbp->blob_threshold;
+	SET_BLOB_META_FILE_ID(meta, dbp->blob_file_id, HEAPMETA);
 }
diff --git a/src/heap/heap_rec.c b/src/heap/heap_rec.c
index 578a61c4..01803a70 100644
--- a/src/heap/heap_rec.c
+++ b/src/heap/heap_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 
 #include "db_config.h"
@@ -9,7 +9,6 @@
 #include "db_int.h"
 #include "dbinc/db_page.h"
 #include "dbinc/heap.h"
-#include "dbinc/log.h"
 #include "dbinc/mp.h"
 
 /*
@@ -34,7 +33,8 @@ __heap_addrem_recover(env, dbtp, lsnp, op, info)
 	DB_THREAD_INFO *ip;
 	PAGE *pagep, *regionp;
 	db_pgno_t region_pgno;
-	int cmp_n, cmp_p, modified, oldspace, ret, space;
+	int cmp_n, cmp_p, modified, ret;
+	u_int32_t oldspace, opcode, space;
 
 	ip = ((DB_TXNHEAD *)info)->thread_info;
 	pagep = NULL;
@@ -44,19 +44,20 @@ __heap_addrem_recover(env, dbtp, lsnp, op, info)
 
 	REC_FGET(mpf, ip, argp->pgno, &pagep, done);
 	modified = 0;
+	opcode = OP_MODE_GET(argp->opcode);
 	cmp_n = log_compare(lsnp, &LSN(pagep));
 	cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
 
-	if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_HEAP) ||
-	    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_HEAP)) {
+	if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_HEAP) ||
+	    (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_HEAP)) {
 		/* We are either redo-ing an add or undoing a delete. */
 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
 		if ((ret = __heap_pitem(dbc, pagep,
 		    argp->indx, argp->nbytes, &argp->hdr, &argp->dbt)) != 0)
 			goto out;
 		modified = 1;
-	} else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_HEAP) ||
-	    (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_HEAP)) {
+	} else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_HEAP) ||
+	    (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_HEAP)) {
 		/* We are either undoing an add or redo-ing a delete. */
 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
 		if ((ret = __heap_ditem(
@@ -76,11 +77,11 @@ __heap_addrem_recover(env, dbtp, lsnp, op, info)
 		HEAP_CALCSPACEBITS(
 		    file_dbp, HEAP_FREESPACE(file_dbp, pagep), space);
 		oldspace = HEAP_SPACE(file_dbp, regionp,
-		    argp->pgno - region_pgno - 1);
+		    (argp->pgno - region_pgno) - 1);
 		if (space != oldspace) {
 			REC_DIRTY(mpf, ip, dbc->priority, &regionp);
 			HEAP_SETSPACE(file_dbp,
-			    regionp, argp->pgno - region_pgno - 1, space);
+			    regionp, (argp->pgno - region_pgno) - 1, space);
 		}
 		if ((ret = __memp_fput(mpf, ip, regionp, dbc->priority)) != 0)
 			goto out;
@@ -384,3 +385,200 @@ out:	if (pagep != NULL)
 		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
 	REC_CLOSE;
 }
+
+/*
+ * __heap_addrem_60_recover --
+ *	Recovery function for addrem.
+ *
+ * PUBLIC: int __heap_addrem_60_recover
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__heap_addrem_60_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__heap_addrem_60_args *argp;
+	DB *file_dbp;
+	DBC *dbc;
+	DB_MPOOLFILE *mpf;
+	DB_THREAD_INFO *ip;
+	HEAPBLOBHDR bhdr;
+	HEAPHDR *hhdr;
+	PAGE *pagep, *regionp;
+	db_pgno_t region_pgno;
+	int cmp_n, cmp_p, modified, ret;
+	u_int32_t oldspace, opcode, space;
+	u_int8_t buf[HEAPBLOBREC_SIZE];
+
+	ip = ((DB_TXNHEAD *)info)->thread_info;
+	pagep = NULL;
+	REC_PRINT(__heap_addrem_60_print);
+	REC_INTRO(__heap_addrem_60_read, ip, 1);
+	region_pgno = HEAP_REGION_PGNO(file_dbp, argp->pgno);
+
+	REC_FGET(mpf, ip, argp->pgno, &pagep, done);
+	modified = 0;
+	opcode = OP_MODE_GET(argp->opcode);
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+
+	if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_HEAP) ||
+	    (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_HEAP)) {
+		hhdr = argp->hdr.data;
+		/*
+		 * In 6.0 heap blob log records were not correctly byte
+		 * swapped, so do the swapping here if the blob file id of the
+		 * database does not match the blob file id stored in the
+		 * record.  Technically byte swapping the blob file id could
+		 * produce the same value, but that would only happen in
+		 * practice if the environment contained over 4 billion blob
+		 * databases.  0 is an invalid blob file id.
+		 */
+		if (F_ISSET(hhdr, HEAP_RECBLOB)) {
+			memcpy(buf + sizeof(HEAPHDR),
+				    argp->dbt.data, HEAPBLOBREC_DSIZE);
+			memcpy(&bhdr, buf, HEAPBLOBREC_SIZE);
+			if ((db_seq_t)bhdr.file_id != dbc->dbp->blob_file_id) {
+				M_64_SWAP(bhdr.id);
+				M_64_SWAP(bhdr.size);
+				M_64_SWAP(bhdr.file_id);
+				DB_ASSERT(env,
+				    (db_seq_t)bhdr.file_id
+				    == dbc->dbp->blob_file_id);
+				memcpy(buf, &bhdr, HEAPBLOBREC_SIZE);
+				memcpy(argp->dbt.data,
+				    buf + sizeof(HEAPHDR), HEAPBLOBREC_DSIZE);
+			}
+		}
+		/* We are either redo-ing an add or undoing a delete. */
+		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
+		if ((ret = __heap_pitem(dbc, pagep,
+		    argp->indx, argp->nbytes, &argp->hdr, &argp->dbt)) != 0)
+			goto out;
+		modified = 1;
+	} else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_HEAP) ||
+	    (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_HEAP)) {
+		/* We are either undoing an add or redo-ing a delete. */
+		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
+		if ((ret = __heap_ditem(
+		    dbc, pagep, argp->indx, argp->nbytes)) != 0)
+			goto out;
+		modified = 1;
+	}
+
+	if (modified) {
+		REC_FGET(mpf, ip, region_pgno, &regionp, done);
+		if (DB_REDO(op))
+			LSN(pagep) = *lsnp;
+		else
+			LSN(pagep) = argp->pagelsn;
+
+		/* Update the available space bitmap, if necessary. */
+		HEAP_CALCSPACEBITS(
+		    file_dbp, HEAP_FREESPACE(file_dbp, pagep), space);
+		oldspace = HEAP_SPACE(file_dbp, regionp,
+		    (argp->pgno - region_pgno) - 1);
+		if (space != oldspace) {
+			REC_DIRTY(mpf, ip, dbc->priority, &regionp);
+			HEAP_SETSPACE(file_dbp,
+			    regionp, (argp->pgno - region_pgno) - 1, space);
+		}
+		if ((ret = __memp_fput(mpf, ip, regionp, dbc->priority)) != 0)
+			goto out;
+	}
+
+done:	*lsnp = argp->prev_lsn;
+	ret = 0;
+
+out:	if (pagep != NULL)
+		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
+	REC_CLOSE;
+
+}
+
+/*
+ * __heap_addrem_50_recover --
+ *	Recovery function for addrem.
+ *
+ * PUBLIC: int __heap_addrem_50_recover
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__heap_addrem_50_recover(env, dbtp, lsnp, op, info)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__heap_addrem_50_args *argp;
+	DB *file_dbp;
+	DBC *dbc;
+	DB_MPOOLFILE *mpf;
+	DB_THREAD_INFO *ip;
+	PAGE *pagep, *regionp;
+	db_pgno_t region_pgno;
+	int cmp_n, cmp_p, modified, ret;
+	u_int32_t oldspace, space;
+
+	ip = ((DB_TXNHEAD *)info)->thread_info;
+	pagep = NULL;
+	REC_PRINT(__heap_addrem_50_print);
+	REC_INTRO(__heap_addrem_50_read, ip, 1);
+	region_pgno = HEAP_REGION_PGNO(file_dbp, argp->pgno);
+
+	REC_FGET(mpf, ip, argp->pgno, &pagep, done);
+	modified = 0;
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+
+	if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_HEAP) ||
+	    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_HEAP)) {
+		/* We are either redo-ing an add or undoing a delete. */
+		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
+		if ((ret = __heap_pitem(dbc, pagep,
+		    argp->indx, argp->nbytes, &argp->hdr, &argp->dbt)) != 0)
+			goto out;
+		modified = 1;
+	} else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_HEAP) ||
+	    (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_HEAP)) {
+		/* We are either undoing an add or redo-ing a delete. */
+		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
+		if ((ret = __heap_ditem(
+		    dbc, pagep, argp->indx, argp->nbytes)) != 0)
+			goto out;
+		modified = 1;
+	}
+
+	if (modified) {
+		REC_FGET(mpf, ip, region_pgno, &regionp, done);
+		if (DB_REDO(op))
+			LSN(pagep) = *lsnp;
+		else
+			LSN(pagep) = argp->pagelsn;
+
+		/* Update the available space bitmap, if necessary. */
+		HEAP_CALCSPACEBITS(
+		    file_dbp, HEAP_FREESPACE(file_dbp, pagep), space);
+		oldspace = HEAP_SPACE(file_dbp,
+		    regionp, (argp->pgno - region_pgno) - 1);
+		if (space != oldspace) {
+			REC_DIRTY(mpf, ip, dbc->priority, &regionp);
+			HEAP_SETSPACE(file_dbp,
+			    regionp, (argp->pgno - region_pgno) - 1, space);
+		}
+		if ((ret = __memp_fput(mpf, ip, regionp, dbc->priority)) != 0)
+			goto out;
+	}
+
+done:	*lsnp = argp->prev_lsn;
+	ret = 0;
+
+out:	if (pagep != NULL)
+		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
+	REC_CLOSE;
+}
diff --git a/src/heap/heap_reclaim.c b/src/heap/heap_reclaim.c
index 8cedb223..463e40c0 100644
--- a/src/heap/heap_reclaim.c
+++ b/src/heap/heap_reclaim.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -50,7 +50,7 @@ __heap_truncate(dbc, countp)
 		return (ret);
 	if ((ret = __memp_fget(mpf, &pgno,
 	    dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &meta)) != 0) {
-		__TLPUT(dbc, lock);
+		(void)__TLPUT(dbc, lock);
 		goto err;
 	}
 
diff --git a/src/heap/heap_stat.c b/src/heap/heap_stat.c
index 9f4361a7..13bd36a2 100644
--- a/src/heap/heap_stat.c
+++ b/src/heap/heap_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -29,7 +29,7 @@ __heap_stat(dbc, spp, flags)
 {
 	DB *dbp;
 	DB_HEAP_STAT *sp;
-	DB_LOCK lock, metalock;
+	DB_LOCK metalock;
 	DB_MPOOLFILE *mpf;
 	ENV *env;
 	HEAPMETA *meta;
@@ -41,7 +41,6 @@ __heap_stat(dbc, spp, flags)
 
 	meta = NULL;
 	LOCK_INIT(metalock);
-	LOCK_INIT(lock);
 	mpf = dbp->mpf;
 	sp = NULL;
 	ret = t_ret = write_meta = 0;
@@ -147,6 +146,8 @@ __heap_stat_print(dbc, flags)
 	    "Underlying database page size", (u_long)sp->heap_pagesize);
 	__db_dl(env,
 	    "Number of records in the database", (u_long)sp->heap_nrecs);
+	__db_dl(env,
+	    "Number of blobs in the database", (u_long)sp->heap_nblobs);
 	__db_dl(env, "Number of database pages", (u_long)sp->heap_pagecnt);
 	__db_dl(env, "Number of database regions", (u_long)sp->heap_nregions);
 	__db_dl(env,
@@ -200,11 +201,13 @@ __heap_stat_callback(dbc, h, cookie, putp)
 		 * We can't just use NUM_ENT, otherwise we'd mis-count split
 		 * records.
 		 */
-		for (i = 0; i < NUM_ENT(h); i++) {
+		for (i = 0; i <= HEAP_HIGHINDX(h); i++) {
 			hdr = (HEAPHDR *)P_ENTRY(dbp, h, i);
 			if (!F_ISSET(hdr, HEAP_RECSPLIT) ||
 			    F_ISSET(hdr, HEAP_RECFIRST))
 				sp->heap_nrecs++;
+			if (F_ISSET(hdr, HEAP_RECBLOB))
+				sp->heap_nblobs++;
 		}
 		break;
 	case P_HEAPMETA: /* Fallthrough */
diff --git a/src/heap/heap_stub.c b/src/heap/heap_stub.c
index b4feb2f3..3093abc2 100644
--- a/src/heap/heap_stub.c
+++ b/src/heap/heap_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id:
  */
@@ -35,6 +35,40 @@ __db_no_heap_am(env)
 }
 
 int
+__heap_60_heapmeta(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(flags, 0);
+	COMPQUIET(fhp, NULL);
+	COMPQUIET(h, NULL);
+	COMPQUIET(dirtyp, NULL);
+	return (__db_no_heap_am(dbp->env));
+}
+
+int
+__heap_60_heap(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(flags, 0);
+	COMPQUIET(fhp, NULL);
+	COMPQUIET(h, NULL);
+	COMPQUIET(dirtyp, NULL);
+	return (__db_no_heap_am(dbp->env));
+}
+
+int
 __heap_db_create(dbp)
 	DB *dbp;
 {
diff --git a/src/heap/heap_upgrade.c b/src/heap/heap_upgrade.c
new file mode 100644
index 00000000..35fa78b9
--- /dev/null
+++ b/src/heap/heap_upgrade.c
@@ -0,0 +1,106 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/blob.h"
+#include "dbinc/db_page.h"
+#include "dbinc/heap.h"
+#include "dbinc/db_upgrade.h"
+
+/*
+ * __heap_60_heapmeta--
+ *	Upgrade the version number.
+ *
+ * PUBLIC: int __heap_60_heapmeta
+ * PUBLIC:      __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+ */
+int
+__heap_60_heapmeta(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	HEAPMETA *hmeta;
+
+	COMPQUIET(flags, 0);
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(fhp, NULL);
+	COMPQUIET(dbp, NULL);
+	hmeta = (HEAPMETA *)h;
+
+	hmeta->dbmeta.version = 2;
+	*dirtyp = 1;
+
+	return (0);
+}
+
+/*
+ * __heap_60_heap --
+ *	Upgrade the blob records on the database heap pages.
+ *
+ * PUBLIC: int __heap_60_heap
+ * PUBLIC:      __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+ */
+int
+__heap_60_heap(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	HEAPBLOBHDR60 hb60;
+	HEAPBLOBHDR60P1 hb60p1;
+	HEAPHDR *hdr;
+	db_seq_t blob_id, blob_size, file_id;
+	db_indx_t indx, *offtbl;
+	int ret;
+
+	COMPQUIET(flags, 0);
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(fhp, NULL);
+	offtbl = (db_indx_t *)HEAP_OFFSETTBL(dbp, h);
+	ret = 0;
+
+	DB_ASSERT(dbp->env, HEAPBLOBREC60_SIZE == HEAPBLOBREC_SIZE);
+	for (indx = 0; indx <= HEAP_HIGHINDX(h); indx++) {
+		if (offtbl[indx] == 0)
+			continue;
+		hdr = (HEAPHDR *)P_ENTRY(dbp, h, indx);
+		if (F_ISSET(hdr, HEAP_RECBLOB)) {
+			memcpy(&hb60, hdr, HEAPBLOBREC60_SIZE);
+			memset(&hb60p1, 0, HEAPBLOBREC_SIZE);
+			hb60p1.std_hdr.flags = hb60.flags;
+			hb60p1.std_hdr.size = hb60.size;
+			hb60p1.encoding = hb60.encoding;
+			hb60p1.lsn = hb60.lsn;
+			GET_BLOB60_ID(dbp->env, hb60, blob_id, ret);
+			if (ret != 0)
+				return (ret);
+			GET_BLOB60_SIZE(dbp->env, hb60, blob_size, ret);
+			if (ret != 0)
+				return (ret);
+			GET_BLOB60_FILE_ID(dbp->env, &hb60, file_id, ret);
+			if (ret != 0)
+				return (ret);
+			SET_BLOB_ID(&hb60p1, blob_id, HEAPBLOBHDR60P1);
+			SET_BLOB_SIZE(&hb60p1, blob_size, HEAPBLOBHDR60P1);
+			SET_BLOB_FILE_ID(&hb60p1, file_id, HEAPBLOBHDR60P1);
+			memcpy(hdr, &hb60p1, HEAPBLOBREC_SIZE);
+			*dirtyp = 1;
+		}
+	}
+
+	return (ret);
+}
diff --git a/src/heap/heap_verify.c b/src/heap/heap_verify.c
index ea15c28b..7c90caf0 100644
--- a/src/heap/heap_verify.c
+++ b/src/heap/heap_verify.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,13 +9,14 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_verify.h"
 #include "dbinc/heap.h"
 #include "dbinc/lock.h"
 #include "dbinc/mp.h"
 
-static	int __heap_safe_gsplit __P((DB *, VRFY_DBINFO *, PAGE *, db_indx_t,
+static	int __heap_safe_gsplit __P((DB *, VRFY_DBINFO *, PAGE *, unsigned,
    DBT *));
 static	int __heap_verify_offset_cmp __P((const void *, const void *));
 
@@ -37,7 +38,8 @@ __heap_vrfy_meta(dbp, vdp, meta, pgno, flags)
 	HEAP *h;
 	VRFY_PAGEINFO *pip;
 	db_pgno_t last_pgno, max_pgno, npgs;
-	int isbad, ret;
+	int isbad, ret, t_ret;
+	db_seq_t blob_id;
 
 	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
 		return (ret);
@@ -97,8 +99,40 @@ __heap_vrfy_meta(dbp, vdp, meta, pgno, flags)
 			    "%lu"), (u_long)pgno));
 			isbad = 1;
 		}
+		h->gbytes = meta->gbytes;
+		h->bytes = meta->bytes;
 	}
 
+/*
+ * Where 64-bit integer support is not available,
+ * return an error if the file has any blobs.
+ */
+	t_ret = 0;
+#ifdef HAVE_64BIT_TYPES
+	GET_BLOB_FILE_ID(dbp->env, meta, blob_id, t_ret);
+	if (t_ret != 0) {
+		isbad = 1;
+		EPRINT((dbp->env, DB_STR_A("1173",
+		    "Page %lu: blob file id overflow.", "%lu"), (u_long)pgno));
+		if (ret == 0)
+			ret = t_ret;
+	}
+#else /* HAVE_64BIT_TYPES */
+	/*
+	 * db_seq_t is an int on systems that do not have 64 integers types, so
+	 * this will compile and run.
+	 */
+	GET_BLOB_FILE_ID(env, meta, blob_id, t_ret);
+	if (t_ret != 0 || blob_id != 0) {
+		isbad = 1;
+		EPRINT((env, DB_STR_A("1206",
+		    "Page %lu: blobs require 64 integer compiler support.",
+		    "%lu"), (u_long)pgno));
+		if (ret == 0)
+			ret = t_ret;
+	}
+#endif
+
 err:	if (LF_ISSET(DB_SALVAGE))
 		ret = __db_salvage_markdone(vdp, pgno);
 
@@ -120,12 +154,16 @@ __heap_vrfy(dbp, vdp, h, pgno, flags)
 	db_pgno_t pgno;
 	u_int32_t flags;
 {
+	HEAPBLOBHDR bhdr;
 	HEAPHDR *hdr;
-	int cnt, i, j, ret;
+	int i, j, ret;
+	off_t blob_size;
+	db_seq_t blob_id, file_id;
 	db_indx_t *offsets, *offtbl, end;
+	u_int32_t cnt;
 
 	if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0)
-		goto err;
+		return (ret);
 
 	if (TYPE(h) == P_IHEAP)
 		/* Nothing to verify on a region page. */
@@ -140,7 +178,7 @@ __heap_vrfy(dbp, vdp, h, pgno, flags)
 	/*
 	 * Build a sorted list of all the offsets in the table.  Entries in the
 	 * offset table are not always sorted.  While we're here, check that
-	 * flags are sane.
+	 * flags are sane, and that the blob entries are sane.
 	 */
 	cnt = 0;
 	for (i = 0; i <= HEAP_HIGHINDX(h); i++) {
@@ -164,6 +202,36 @@ __heap_vrfy(dbp, vdp, h, pgno, flags)
 			ret = DB_VERIFY_BAD;
 			goto err;
 		}
+		if (F_ISSET(hdr, HEAP_RECBLOB)) {
+			/*
+			 * Check that the blob file exists and is the same
+			 * file size as is stored in the database record.
+			 */
+			memcpy(&bhdr, hdr, sizeof(HEAPBLOBHDR));
+			blob_id = (db_seq_t)bhdr.id;
+			GET_BLOB_SIZE(dbp->env, bhdr, blob_size, ret);
+			if (ret != 0 || blob_size < 0) {
+				EPRINT((dbp->env, DB_STR_A("1175",
+			"Page %lu: blob file size value has overflowed",
+				    "%lu"), (u_long)pgno));
+				ret = DB_VERIFY_BAD;
+				goto err;
+			}
+			file_id = (db_seq_t)bhdr.file_id;
+			if (file_id == 0) {
+				EPRINT((dbp->env, DB_STR_A("1177",
+			"Page %lu: invalid blob dir id %llu at item %lu",
+				    "%lu %llu, %lu"), (u_long)pgno,
+				    (unsigned long long)file_id, (u_long)i));
+				ret = DB_VERIFY_BAD;
+				goto err;
+			}
+			if ((ret = __blob_vrfy(dbp->env, blob_id,
+			    blob_size, file_id, 0, pgno, flags)) != 0) {
+				ret = DB_VERIFY_BAD;
+				goto err;
+			}
+		}
 
 		offsets[cnt] = offtbl[i];
 		cnt++;
@@ -180,7 +248,7 @@ __heap_vrfy(dbp, vdp, h, pgno, flags)
 	 * record.  We can't use the P_ENTRY macro because we've kept track of
 	 * the offsets, not the indexes.
 	 */
-	for (i = 0; i < cnt - 1; i++) {
+	for (i = 0; i < (int)cnt - 1; i++) {
 		hdr = (HEAPHDR *)((u_int8_t *)h + offsets[i]);
 		end = offsets[i] + HEAP_HDRSIZE(hdr) + hdr->size;
 		if (end > offsets[i+1]) {
@@ -328,12 +396,22 @@ __heap_salvage(dbp, vdp, pgno, h, handle, callback, flags)
 	u_int32_t flags;
 {
 	DBT dbt;
+	ENV *env;
 	HEAPHDR *hdr;
+	HEAPBLOBHDR bhdr;
 	db_indx_t i, *offtbl;
+	char *prefix;
 	int err_ret, ret, t_ret;
+	off_t blob_size, blob_offset, remaining;
+	u_int32_t blob_buf_size;
+	u_int8_t *blob_buf;
+	db_seq_t blob_id, file_id;
 
 	COMPQUIET(flags, 0);
 	memset(&dbt, 0, sizeof(DBT));
+	blob_buf = NULL;
+	blob_buf_size = 0;
+	env = dbp->env;
 
 	offtbl = (db_indx_t *)HEAP_OFFSETTBL(dbp, h);
 	err_ret = ret = t_ret = 0;
@@ -357,9 +435,74 @@ __heap_salvage(dbp, vdp, pgno, h, handle, callback, flags)
 			if (dbt.size > dbp->pgsize * 4)
 				dbt.size = dbp->pgsize * 4;
 			if ((ret =
-			    __os_malloc(dbp->env, dbt.size, &dbt.data)) != 0)
+			    __os_malloc(env, dbt.size, &dbt.data)) != 0)
 				goto err;
-			__heap_safe_gsplit(dbp, vdp, h, i, &dbt);
+			if ((ret = __heap_safe_gsplit
+			    (dbp, vdp, h, i, &dbt)) != 0) {
+				err_ret = ret;
+				__os_free(env, dbt.data);
+				continue;
+			}
+		} else if (F_ISSET(hdr, HEAP_RECBLOB)) {
+			memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE);
+			blob_id = (db_seq_t)bhdr.id;
+			GET_BLOB_SIZE(env, bhdr, blob_size, ret);
+			if (ret != 0 || blob_size < 0)
+				goto err;
+			file_id = (db_seq_t)bhdr.file_id;
+			/* Read the blob, in pieces if it is too large.*/
+			blob_offset = 0;
+			if (blob_size > MEGABYTE) {
+				if (blob_buf_size < MEGABYTE) {
+					if ((ret = __os_realloc(
+					    env,  MEGABYTE, &blob_buf)) != 0)
+						goto err;
+					blob_buf_size = MEGABYTE;
+				}
+			} else if (blob_buf_size < blob_size) {
+				blob_buf_size = (u_int32_t)blob_size;
+				if ((ret = __os_realloc(
+				    env, blob_buf_size, &blob_buf)) != 0)
+					goto err;
+			}
+			dbt.data = blob_buf;
+			dbt.ulen = blob_buf_size;
+			remaining = blob_size;
+			prefix = " ";
+			do {
+				if ((ret = __blob_salvage(env, blob_id,
+				    blob_offset,
+				    ((remaining < blob_buf_size) ?
+				    (size_t)remaining : blob_buf_size),
+				    file_id, 0, &dbt)) != 0) {
+					if (LF_ISSET(DB_AGGRESSIVE)) {
+						ret = DB_VERIFY_BAD;
+						break;
+					}
+					F_CLR(vdp, SALVAGE_STREAM_BLOB);
+					goto err;
+				}
+				if (remaining > blob_buf_size)
+					F_SET(vdp, SALVAGE_STREAM_BLOB);
+				else
+					F_CLR(vdp, SALVAGE_STREAM_BLOB);
+				if ((t_ret = __db_vrfy_prdbt(
+				    &dbt, 0, prefix, handle,
+				    callback, 0, 0, vdp)) != 0) {
+					if (ret == 0)
+						ret = t_ret;
+					F_CLR(vdp, SALVAGE_STREAM_BLOB);
+					goto err;
+				}
+				prefix = NULL;
+				blob_offset += dbt.size;
+				if (remaining < blob_buf_size)
+					remaining = 0;
+				else
+					remaining -= blob_buf_size;
+			} while (remaining > 0);
+			F_CLR(vdp, SALVAGE_STREAM_BLOB);
+			continue;
 		} else {
 			dbt.data = (u_int8_t *)hdr + HEAP_HDRSIZE(hdr);
 			dbt.size = hdr->size;
@@ -369,11 +512,13 @@ __heap_salvage(dbp, vdp, pgno, h, handle, callback, flags)
 		    0, " ", handle, callback, 0, 0, vdp)) != 0)
 			err_ret = ret;
 		if (F_ISSET(hdr, HEAP_RECSPLIT))
-			__os_free(dbp->env, dbt.data);
+			__os_free(env, dbt.data);
 	}
 
 err:	if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0)
 		return (t_ret);
+	if (blob_buf != NULL)
+		__os_free(env, blob_buf);
 	return ((ret == 0 && err_ret != 0) ? err_ret : ret);
 }
 
@@ -386,7 +531,7 @@ __heap_safe_gsplit(dbp, vdp, h, i, dbt)
      DB *dbp;
      VRFY_DBINFO *vdp;
      PAGE *h;
-     db_indx_t i;
+     unsigned i;
      DBT *dbt;
 {
 	DB_MPOOLFILE *mpf;
@@ -433,7 +578,7 @@ __heap_safe_gsplit(dbp, vdp, h, i, dbt)
 
 err:	if (gotpg && (t_ret = __memp_fput(
 	    mpf, vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0)
-		t_ret = ret;
+		ret = t_ret;
 	return (ret);
 }
 
diff --git a/src/hmac/hmac.c b/src/hmac/hmac.c
index 4febfc60..acaca6bc 100644
--- a/src/hmac/hmac.c
+++ b/src/hmac/hmac.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * Some parts of this code originally written by Adam Stubblefield,
  * -- astubble@rice.edu.
diff --git a/src/lock/Design b/src/lock/Design
index f82bc7e8..2a1d1145 100644
--- a/src/lock/Design
+++ b/src/lock/Design
@@ -298,4 +298,4 @@ A: We currently do not support any automatic configuration for FINE_GRAIN
    locking.  When we do, will need to document that atomicity discussion
    listed above (it is bug-report #553).
 
-Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
diff --git a/src/lock/lock.c b/src/lock/lock.c
index e4627734..bcebbe44 100644
--- a/src/lock/lock.c
+++ b/src/lock/lock.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -31,8 +31,8 @@ static int __lock_trade __P((ENV *, DB_LOCK *, DB_LOCKER *));
 static int __lock_vec_api __P((ENV *,
 		u_int32_t, u_int32_t,  DB_LOCKREQ *, int, DB_LOCKREQ **));
 
-static const char __db_lock_invalid[] = "%s: Lock is no longer valid";
-static const char __db_locker_invalid[] = "Locker is not valid";
+#define	LOCK_INVALID_ERR DB_STR_A("2056", "%s: Lock is no longer valid", "%s")
+#define	LOCKER_INVALID_ERR DB_STR("2057", "Locker is not valid")
 
 #ifdef DEBUG
 extern void __db_loadme (void);
@@ -111,7 +111,8 @@ __lock_vec(env, sh_locker, flags, list, nlist, elistp)
 	DB_LOCKREQ *list, **elistp;
 {
 	struct __db_lock *lp, *next_lock;
-	DB_LOCK lock; DB_LOCKOBJ *sh_obj;
+	DB_LOCK lock;
+	DB_LOCKOBJ *sh_obj;
 	DB_LOCKREGION *region;
 	DB_LOCKTAB *lt;
 	DBT *objlist, *np;
@@ -200,12 +201,18 @@ __lock_vec(env, sh_locker, flags, list, nlist, elistp)
 				if (writes == 1 ||
 				    lp->mode == DB_LOCK_READ ||
 				    lp->mode == DB_LOCK_READ_UNCOMMITTED) {
-					SH_LIST_REMOVE(lp,
-					    locker_links, __db_lock);
+					/*
+					 * It is safe to look at lp before
+					 * locking because any threads sharing
+					 * this locker must not be in the API
+					 * at the same time.
+					 */
 					sh_obj = SH_OFF_TO_PTR(lp,
 					    lp->obj, DB_LOCKOBJ);
 					ndx = sh_obj->indx;
 					OBJECT_LOCK_NDX(lt, region, ndx);
+					SH_LIST_REMOVE(lp,
+					    locker_links, __db_lock);
 					/*
 					 * We are not letting lock_put_internal
 					 * unlink the lock, so we'll have to
@@ -423,7 +430,7 @@ __lock_get_api(env, locker, flags, obj, lock_mode, lock)
 	region = env->lk_handle->reginfo.primary;
 
 	LOCK_LOCKERS(env, region);
-	ret = __lock_getlocker_int(env->lk_handle, locker, 0, &sh_locker);
+	ret = __lock_getlocker_int(env->lk_handle, locker, 0, NULL, &sh_locker);
 	UNLOCK_LOCKERS(env, region);
 	LOCK_SYSTEM_LOCK(env->lk_handle, region);
 	if (ret == 0)
@@ -979,12 +986,21 @@ in_abort:	newl->status = DB_LSTAT_WAITING;
 			goto err;
 		}
 
+		/*
+		 * Sleep until someone releases a lock which might let us in.
+		 * Since we want to set the thread state back to ACTIVE, don't
+		 * use the normal MUTEX_LOCK() macro, which would immediately
+		 * return a panic error code. Instead, return the panic after
+		 * restoring the thread state.
+		 */
 		PERFMON2(env, lock, suspend, (DBT *) obj, lock_mode);
-		MUTEX_LOCK(env, newl->mtx_lock);
+		ret = __mutex_lock(env, newl->mtx_lock);
 		PERFMON2(env, lock, resume, (DBT *) obj, lock_mode);
 
 		if (ip != NULL)
 			ip->dbth_state = THREAD_ACTIVE;
+		if (ret != 0)
+			return (ret);
 
 		LOCK_SYSTEM_LOCK(lt, region);
 		OBJECT_LOCK_NDX(lt, region, ndx);
@@ -1165,7 +1181,7 @@ __lock_put_nolock(env, lock, runp, flags)
 	lockp = R_ADDR(&lt->reginfo, lock->off);
 	DB_ASSERT(env, lock->gen == lockp->gen);
 	if (lock->gen != lockp->gen) {
-		__db_errx(env, __db_lock_invalid, "DB_LOCK->lock_put");
+		__db_errx(env, LOCK_INVALID_ERR, "DB_LOCK->lock_put");
 		LOCK_INIT(*lock);
 		return (EINVAL);
 	}
@@ -1224,7 +1240,7 @@ __lock_downgrade(env, lock, new_mode, flags)
 
 	lockp = R_ADDR(&lt->reginfo, lock->off);
 	if (lock->gen != lockp->gen) {
-		__db_errx(env, __db_lock_invalid, "lock_downgrade");
+		__db_errx(env, LOCK_INVALID_ERR, "lock_downgrade");
 		ret = EINVAL;
 		goto out;
 	}
@@ -1662,7 +1678,7 @@ __lock_inherit_locks(lt, sh_locker, flags)
 	 * locks, so inheritance is easy!
 	 */
 	if (sh_locker == NULL) {
-		__db_errx(env, __db_locker_invalid);
+		__db_errx(env, LOCKER_INVALID_ERR);
 		return (EINVAL);
 	}
 
@@ -1683,11 +1699,15 @@ __lock_inherit_locks(lt, sh_locker, flags)
 	for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
 	    lp != NULL;
 	    lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
-		SH_LIST_REMOVE(lp, locker_links, __db_lock);
-
-		/* See if the parent already has a lock. */
+		/*
+		 * See if the parent already has a lock. It is safe to look at
+		 * lp before locking it because any threads sharing this locker
+		 * must not be in the API with the same time.
+		 */
 		obj = SH_OFF_TO_PTR(lp, lp->obj, DB_LOCKOBJ);
 		OBJECT_LOCK_NDX(lt, region, obj->indx);
+		SH_LIST_REMOVE(lp, locker_links, __db_lock);
+
 		SH_TAILQ_FOREACH(hlp, &obj->holders, links, __db_lock)
 			if (hlp->holder == poff && lp->mode == hlp->mode)
 				break;
@@ -1917,7 +1937,7 @@ __lock_trade(env, lock, new_locker)
 
 	/* If the lock is already released, simply return. */
 	if (lp->gen != lock->gen)
-		return (DB_NOTFOUND);
+		return (USR_ERR(env, DB_NOTFOUND));
 
 	if (new_locker == NULL) {
 		__db_errx(env, DB_STR("2040", "Locker does not exist"));
diff --git a/src/lock/lock_alloc.incl b/src/lock/lock_alloc.incl
index edea07d2..e10cbcbf 100644
--- a/src/lock/lock_alloc.incl
+++ b/src/lock/lock_alloc.incl
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2011, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2012, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/lock/lock_deadlock.c b/src/lock/lock_deadlock.c
index 3c00d7f1..79086687 100644
--- a/src/lock/lock_deadlock.c
+++ b/src/lock/lock_deadlock.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -683,38 +683,45 @@ again:		memset(bitmap, 0, count * sizeof(u_int32_t) * nentries);
 	/*
 	 * Now for each locker, record its last lock and set abort status.
 	 * We need to look at the heldby list carefully.  We have the LOCKERS
-	 * locked so they cannot go away.  The lock at the head of the
-	 * list can be removed by locking the object it points at.
-	 * Since lock memory is not freed if we get a lock we can look
-	 * at it safely but SH_LIST_FIRST is not atomic, so we check that
-	 * the list has not gone empty during that macro. We check abort
-	 * status after building the bit maps so that we will not detect
-	 * a blocked transaction without noting that it is already aborting.
+	 * locked so they cannot go away. The LOCK_SYSTEM_LOCK keeps things
+	 * steady when the lock table is not partitioned.  However, if there are
+	 * multiple lock partitions then the head of the heldby list can be
+	 * changed by another thread locking the object it points at.  That
+	 * thread will have OBJECT_LOCK()'d that lock's partition.  We need to
+	 * look at the lock entry in order to determine which partition to
+	 * mutex_lock.  Since lock structs are never really freed, once we get
+	 * the pointer we can look at it safely. However SH_LIST_FIRST is not
+	 * atomic, so we first fetch the pointer and then check that the list
+	 * was not empty during the fetch. This lets us at least mutex_lock the
+	 * partition of the lock. Afterwards, we retry if the lock is no longer
+	 * the first for that locker -- it might have changed to something ELSE
+	 * since then. We check abort status after building the bit maps so that
+	 * we will not pick a blocked transaction without noting that it is
+	 * already aborting.
 	 */
 	for (id = 0; id < count; id++) {
 		if (!id_array[id].valid)
 			continue;
-		if ((ret = __lock_getlocker_int(lt,
-		    id_array[id].id, 0, &lockerp)) != 0 || lockerp == NULL)
+		if ((ret = __lock_getlocker_int(lt, id_array[id].id,
+		     0, NULL, &lockerp)) != 0 || lockerp == NULL)
 			continue;
 
 		/*
-		 * If this is a master transaction, try to
-		 * find one of its children's locks first,
-		 * as they are probably more recent.
+		 * If this is a master transaction, try to find one of its
+		 * children's locks first, as they are probably more recent.
 		 */
 		child = SH_LIST_FIRST(&lockerp->child_locker, __db_locker);
 		if (child != NULL) {
 			do {
-c_retry:			lp = SH_LIST_FIRST(&child->heldby, __db_lock);
-				if (SH_LIST_EMPTY(&child->heldby) || lp == NULL)
+c_retry:			lp = SH_LIST_FIRSTP(&child->heldby, __db_lock);
+				if (__SH_LIST_WAS_EMPTY(&child->heldby, lp))
 					goto c_next;
 
 				if (F_ISSET(child, DB_LOCKER_INABORT))
 					id_array[id].in_abort = 1;
 				ndx = lp->indx;
 				OBJECT_LOCK_NDX(lt, region, ndx);
-				if (lp != SH_LIST_FIRST(
+				if (lp != SH_LIST_FIRSTP(
 				    &child->heldby, __db_lock) ||
 				    ndx != lp->indx) {
 					OBJECT_UNLOCK(lt, region, ndx);
@@ -733,11 +740,11 @@ c_next:				child = SH_LIST_NEXT(
 			} while (child != NULL);
 		}
 
-l_retry:	lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
-		if (!SH_LIST_EMPTY(&lockerp->heldby) && lp != NULL) {
+l_retry:	lp = SH_LIST_FIRSTP(&lockerp->heldby, __db_lock);
+		if (!__SH_LIST_WAS_EMPTY(&lockerp->heldby, lp)) {
 			ndx = lp->indx;
 			OBJECT_LOCK_NDX(lt, region, ndx);
-			if (lp != SH_LIST_FIRST(&lockerp->heldby, __db_lock) ||
+			if (lp != SH_LIST_FIRSTP(&lockerp->heldby, __db_lock) ||
 			    lp->indx != ndx) {
 				OBJECT_UNLOCK(lt, region, ndx);
 				goto l_retry;
@@ -869,7 +876,7 @@ __dd_abort(env, info, statusp)
 	 * detecting, return that.
 	 */
 	if ((ret = __lock_getlocker_int(lt,
-	    info->last_locker_id, 0, &lockerp)) != 0)
+	    info->last_locker_id, 0, NULL, &lockerp)) != 0)
 		goto err;
 	if (lockerp == NULL || F_ISSET(lockerp, DB_LOCKER_INABORT)) {
 		*statusp = DB_ALREADY_ABORTED;
diff --git a/src/lock/lock_failchk.c b/src/lock/lock_failchk.c
index 59fb010f..84f757bf 100644
--- a/src/lock/lock_failchk.c
+++ b/src/lock/lock_failchk.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -15,7 +15,7 @@
 /*
  * __lock_failchk --
  *	Check for locks held by dead threads of control and release
- *	read locks.  If any write locks were held by dead non-trasnactional
+ *	read locks.  If any write locks were held by dead non-transactional
  *	lockers then we must abort and run recovery.  Otherwise we release
  *	read locks for lockers owned by dead threads.  Write locks for
  *	dead transactional lockers will be freed when we abort the transaction.
@@ -98,9 +98,8 @@ retry:	LOCK_LOCKERS(env, lrp);
 			/*
 			 * This locker is most likely referenced by a cursor
 			 * which is owned by a dead thread.  Normally the
-			 * cursor would be available for other threads
-			 * but we assume the dead thread will never release
-			 * it.
+			 * cursor would be available for other threads but we
+			 * assume the dead thread will never release it.
 			 */
 			if (lip->id < TXN_MINIMUM &&
 			    (ret = __lock_freelocker(lt, lip)) != 0)
diff --git a/src/lock/lock_id.c b/src/lock/lock_id.c
index 24b545d1..e0dbaa01 100644
--- a/src/lock/lock_id.c
+++ b/src/lock/lock_id.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -17,7 +17,7 @@ static int __lock_freelocker_int
 
 /*
  * __lock_id_pp --
- *	ENV->lock_id pre/post processing.
+ *	DB_ENV->lock_id pre/post processing.
  *
  * PUBLIC: int __lock_id_pp __P((DB_ENV *, u_int32_t *));
  */
@@ -43,7 +43,11 @@ __lock_id_pp(dbenv, idp)
 
 /*
  * __lock_id --
- *	ENV->lock_id.
+ *	Allocate a new lock id as well as a locker struct to hold it. If we wrap
+ *	around then we find the minimum currently in use and make sure we can
+ *	stay below that. This is similar to __txn_begin_int's code to recover
+ *	txn ids.
+ *
  *
  * PUBLIC: int  __lock_id __P((ENV *, u_int32_t *, DB_LOCKER **));
  */
@@ -59,22 +63,15 @@ __lock_id(env, idp, lkp)
 	u_int32_t id, *ids;
 	int nids, ret;
 
-	lk = NULL;
 	lt = env->lk_handle;
 	region = lt->reginfo.primary;
 	id = DB_LOCK_INVALIDID;
-	ret = 0;
-
-	id = DB_LOCK_INVALIDID;
 	lk = NULL;
+	ret = 0;
 
 	LOCK_LOCKERS(env, region);
 
 	/*
-	 * Allocate a new lock id.  If we wrap around then we find the minimum
-	 * currently in use and make sure we can stay below that.  This code is
-	 * similar to code in __txn_begin_int for recovering txn ids.
-	 *
 	 * Our current valid range can span the maximum valid value, so check
 	 * for it and wrap manually.
 	 */
@@ -98,7 +95,7 @@ __lock_id(env, idp, lkp)
 	id = ++region->lock_id;
 
 	/* Allocate a locker for this id. */
-	ret = __lock_getlocker_int(lt, id, 1, &lk);
+	ret = __lock_getlocker_int(lt, id, 1, NULL, &lk);
 
 err:	UNLOCK_LOCKERS(env, region);
 
@@ -165,7 +162,8 @@ __lock_id_free_pp(dbenv, id)
 
 	LOCK_LOCKERS(env, region);
 	if ((ret =
-	     __lock_getlocker_int(env->lk_handle, id, 0, &sh_locker)) == 0) {
+	    __lock_getlocker_int(env->lk_handle,
+	    id, 0, NULL, &sh_locker)) == 0) {
 		if (sh_locker != NULL)
 			ret = __lock_freelocker_int(lt, region, sh_locker, 1);
 		else {
@@ -194,8 +192,10 @@ __lock_id_free(env, sh_locker)
 	ENV *env;
 	DB_LOCKER *sh_locker;
 {
+	DB_LOCKER locker;
 	DB_LOCKREGION *region;
 	DB_LOCKTAB *lt;
+	DB_MSGBUF mb;
 	int ret;
 
 	lt = env->lk_handle;
@@ -203,9 +203,14 @@ __lock_id_free(env, sh_locker)
 	ret = 0;
 
 	if (sh_locker->nlocks != 0) {
-		__db_errx(env, DB_STR("2046",
-		    "Locker still has locks"));
-		ret = EINVAL;
+		locker = *sh_locker;
+		ret = USR_ERR(env, EINVAL);
+		__db_errx(env, DB_STR_A("2046",
+		    "Locker %d still has %d locks", "%d %d"),
+		    locker.id, locker.nlocks );
+		DB_MSGBUF_INIT(&mb);
+		(void)__lock_dump_locker(env, &mb, lt, sh_locker);
+		DB_MSGBUF_FLUSH(env, &mb);
 		goto err;
 	}
 
@@ -243,17 +248,19 @@ __lock_id_set(env, cur_id, max_id)
 }
 
 /*
- * __lock_getlocker --
- *	Get a locker in the locker hash table.  The create parameter
- * indicates if the locker should be created if it doesn't exist in
- * the table.
+ * __lock_getlocker,__lock_getlocker_int --
+ *	Get a locker in the locker hash table.  The create parameter indicates
+ * whether the locker should be created if it doesn't exist in the table. If
+ * there's a matching locker cached in the thread info, use that without
+ * locking.
  *
- * This must be called with the locker mutex lock if create == 1.
+ * The internal version does not check the thread info cache; it must be called
+ * with the locker mutex locked.
  *
  * PUBLIC: int __lock_getlocker __P((DB_LOCKTAB *,
  * PUBLIC:     u_int32_t, int, DB_LOCKER **));
  * PUBLIC: int __lock_getlocker_int __P((DB_LOCKTAB *,
- * PUBLIC:     u_int32_t, int, DB_LOCKER **));
+ * PUBLIC:     u_int32_t, int, DB_THREAD_INFO *, DB_LOCKER **));
  */
 int
 __lock_getlocker(lt, locker, create, retp)
@@ -263,32 +270,47 @@ __lock_getlocker(lt, locker, create, retp)
 	DB_LOCKER **retp;
 {
 	DB_LOCKREGION *region;
+	DB_THREAD_INFO *ip;
 	ENV *env;
 	int ret;
 
 	COMPQUIET(region, NULL);
 	env = lt->env;
 	region = lt->reginfo.primary;
-
+	ENV_GET_THREAD_INFO(env, ip);
+
+	/* Check to see if the locker is already in the thread info */
+	if (ip != NULL && ip->dbth_local_locker != INVALID_ROFF) {
+		*retp = (DB_LOCKER *)
+		    R_ADDR(&lt->reginfo, ip->dbth_local_locker);
+		if ((*retp)->id == locker)  {
+			   DB_ASSERT(env, !F_ISSET(*retp, DB_LOCKER_FREE));
+#ifdef HAVE_STATISTICS
+			    region->stat.st_nlockers_hit++;
+#endif
+			return (0);
+		}
+	}
 	LOCK_LOCKERS(env, region);
-	ret = __lock_getlocker_int(lt, locker, create, retp);
+	ret = __lock_getlocker_int(lt, locker, create, ip, retp);
 	UNLOCK_LOCKERS(env, region);
-
 	return (ret);
 }
 
 int
-__lock_getlocker_int(lt, locker, create, retp)
+__lock_getlocker_int(lt, locker, create, ip, retp)
 	DB_LOCKTAB *lt;
 	u_int32_t locker;
 	int create;
+	DB_THREAD_INFO *ip;
 	DB_LOCKER **retp;
 {
 	DB_LOCKER *sh_locker;
 	DB_LOCKREGION *region;
-	DB_THREAD_INFO *ip;
+#ifdef DIAGNOSTIC
+	DB_THREAD_INFO *diag;
+#endif
 	ENV *env;
-	db_mutex_t mutex;
 	u_int32_t i, indx, nlockers;
 	int ret;
 
@@ -304,59 +326,85 @@ __lock_getlocker_int(lt, locker, create, retp)
 	SH_TAILQ_FOREACH(sh_locker, &lt->locker_tab[indx], links, __db_locker)
 		if (sh_locker->id == locker)
 			break;
+
 	if (sh_locker == NULL && create) {
-		nlockers = 0;
-		/* Create new locker and then insert it into hash table. */
-		if ((ret = __mutex_alloc(env, MTX_LOGICAL_LOCK,
-		    DB_MUTEX_LOGICAL_LOCK | DB_MUTEX_SELF_BLOCK,
-		    &mutex)) != 0)
-			return (ret);
-		else
-			MUTEX_LOCK(env, mutex);
-		if ((sh_locker = SH_TAILQ_FIRST(
-		    &region->free_lockers, __db_locker)) == NULL) {
-			nlockers = region->stat.st_lockers >> 2;
-			/* Just in case. */
-			if (nlockers == 0)
-				nlockers = 1;
-			if (region->stat.st_maxlockers != 0 &&
-			    region->stat.st_maxlockers <
-			    region->stat.st_lockers + nlockers)
-				nlockers = region->stat.st_maxlockers -
-				region->stat.st_lockers;
-			/*
-			 * Don't hold lockers when getting the region,
-			 * we could deadlock.  When creating a locker
-			 * there is no race since the id allocation
-			 * is synchronized.
-			 */
-			UNLOCK_LOCKERS(env, region);
-			LOCK_REGION_LOCK(env);
-			/*
-			 * If the max memory is not sized for max objects,
-			 * allocate as much as possible.
-			 */
-			F_SET(&lt->reginfo, REGION_TRACKED);
-			while (__env_alloc(&lt->reginfo, nlockers *
-			    sizeof(struct __db_locker), &sh_locker) != 0)
-				if ((nlockers >> 1) == 0)
-					break;
-			F_CLR(&lt->reginfo, REGION_TRACKED);
-			LOCK_REGION_UNLOCK(lt->env);
-			LOCK_LOCKERS(env, region);
-			for (i = 0; i < nlockers; i++) {
+		/* Can we reuse a locker struct cached in the thread info? */
+		if (ip != NULL && ip->dbth_local_locker != INVALID_ROFF &&
+		    (sh_locker = (DB_LOCKER*)R_ADDR(&lt->reginfo,
+		    ip->dbth_local_locker))->id == DB_LOCK_INVALIDID) {
+			   DB_ASSERT(env, !F_ISSET(sh_locker, DB_LOCKER_FREE));
+#ifdef HAVE_STATISTICS
+			    region->stat.st_nlockers_reused++;
+#endif
+		} else {
+			/* Create new locker and insert it into hash table. */
+			if ((sh_locker = SH_TAILQ_FIRST(
+			    &region->free_lockers, __db_locker)) == NULL) {
+				nlockers = region->stat.st_lockers >> 2;
+				/* Just in case. */
+				if (nlockers == 0)
+					nlockers = 1;
+				if (region->stat.st_maxlockers != 0 &&
+				    region->stat.st_maxlockers <
+				    region->stat.st_lockers + nlockers)
+					nlockers = region->stat.st_maxlockers -
+					region->stat.st_lockers;
+				/*
+				 * Don't hold lockers when getting the region,
+				 * we could deadlock.  When creating a locker
+				 * there is no race since the id allocation
+				 * is synchronized.
+				 */
+				UNLOCK_LOCKERS(env, region);
+				LOCK_REGION_LOCK(env);
+				/*
+				 * If the max memory is not sized for max
+				 * objects, allocate as much as possible.
+				 */
+				F_SET(&lt->reginfo, REGION_TRACKED);
+				while (__env_alloc(&lt->reginfo, nlockers *
+				    sizeof(struct __db_locker),
+				    &sh_locker) != 0) {
+					nlockers >>= 1;
+					if (nlockers == 0)
+						break;
+				}
+				F_CLR(&lt->reginfo, REGION_TRACKED);
+				LOCK_REGION_UNLOCK(lt->env);
+				LOCK_LOCKERS(env, region);
+				for (i = 0; i < nlockers; i++) {
+					SH_TAILQ_INSERT_HEAD(
+					    &region->free_lockers,
+					    sh_locker, links, __db_locker);
+					sh_locker->mtx_locker = MUTEX_INVALID;
+#ifdef DIAGNOSTIC
+					sh_locker->prev_locker = INVALID_ROFF;
+#endif
+					sh_locker++;
+				}
+				if (nlockers == 0)
+					return (__lock_nomem(env,
+					    "locker entries"));
+				region->stat.st_lockers += nlockers;
+				sh_locker = SH_TAILQ_FIRST(
+				    &region->free_lockers, __db_locker);
+			}
+			SH_TAILQ_REMOVE(
+			    &region->free_lockers,
+			    sh_locker, links, __db_locker);
+		}
+		F_CLR(sh_locker, DB_LOCKER_FREE);
+		if (sh_locker->mtx_locker == MUTEX_INVALID) {
+			if ((ret = __mutex_alloc(env, MTX_LOGICAL_LOCK,
+			    DB_MUTEX_LOGICAL_LOCK | DB_MUTEX_SELF_BLOCK,
+			    &sh_locker->mtx_locker)) != 0) {
 				SH_TAILQ_INSERT_HEAD(&region->free_lockers,
 				    sh_locker, links, __db_locker);
-				sh_locker++;
+				return (ret);
 			}
-			if (nlockers == 0)
-				return (__lock_nomem(env, "locker entries"));
-			region->stat.st_lockers += nlockers;
-			sh_locker = SH_TAILQ_FIRST(
-			    &region->free_lockers, __db_locker);
+			MUTEX_LOCK(env, sh_locker->mtx_locker);
 		}
-		SH_TAILQ_REMOVE(
-		    &region->free_lockers, sh_locker, links, __db_locker);
+
 		++region->nlockers;
 #ifdef HAVE_STATISTICS
 		STAT_PERFMON2(env, lock, nlockers, region->nlockers, locker);
@@ -365,10 +413,10 @@ __lock_getlocker_int(lt, locker, create, retp)
 			    region->stat.st_maxnlockers,
 			    region->nlockers, locker);
 #endif
+
 		sh_locker->id = locker;
 		env->dbenv->thread_id(
 		    env->dbenv, &sh_locker->pid, &sh_locker->tid);
-		sh_locker->mtx_locker = mutex;
 		sh_locker->dd_id = 0;
 		sh_locker->master_locker = INVALID_ROFF;
 		sh_locker->parent_locker = INVALID_ROFF;
@@ -386,10 +434,20 @@ __lock_getlocker_int(lt, locker, create, retp)
 		    &lt->locker_tab[indx], sh_locker, links, __db_locker);
 		SH_TAILQ_INSERT_HEAD(&region->lockers,
 		    sh_locker, ulinks, __db_locker);
-		ENV_GET_THREAD_INFO(env, ip);
+
+		if (ip != NULL && ip->dbth_local_locker == INVALID_ROFF)
+			ip->dbth_local_locker =
+			    R_OFFSET(&lt->reginfo, sh_locker);
 #ifdef DIAGNOSTIC
-		if (ip != NULL)
-			ip->dbth_locker = R_OFFSET(&lt->reginfo, sh_locker);
+		/*
+		 * __db_has_pagelock checks for proper locking by dbth_locker.
+		 */
+		if ((diag = ip) == NULL)
+			ENV_GET_THREAD_INFO(env, diag);
+		if (diag != NULL) {
+			sh_locker->prev_locker = diag->dbth_locker;
+			diag->dbth_locker = R_OFFSET(&lt->reginfo, sh_locker);
+		}
 #endif
 	}
 
@@ -420,7 +478,7 @@ __lock_addfamilylocker(env, pid, id, is_family)
 	LOCK_LOCKERS(env, region);
 
 	/* get/create the  parent locker info */
-	if ((ret = __lock_getlocker_int(lt, pid, 1, &mlockerp)) != 0)
+	if ((ret = __lock_getlocker_int(lt, pid, 1, NULL, &mlockerp)) != 0)
 		goto err;
 
 	/*
@@ -430,7 +488,7 @@ __lock_addfamilylocker(env, pid, id, is_family)
 	 * we manipulate it, nor can another child in the
 	 * family be created at the same time.
 	 */
-	if ((ret = __lock_getlocker_int(lt, id, 1, &lockerp)) != 0)
+	if ((ret = __lock_getlocker_int(lt, id, 1, NULL, &lockerp)) != 0)
 		goto err;
 
 	/* Point to our parent. */
@@ -466,9 +524,9 @@ err:	UNLOCK_LOCKERS(env, region);
 }
 
 /*
- * __lock_freelocker_int
+ * __lock_freelocker_int --
  *      Common code for deleting a locker; must be called with the
- *	locker bucket locked.
+ *	lockers mutex locked.
  */
 static int
 __lock_freelocker_int(lt, region, sh_locker, reallyfree)
@@ -478,15 +536,21 @@ __lock_freelocker_int(lt, region, sh_locker, reallyfree)
 	int reallyfree;
 {
 	ENV *env;
+	DB_MSGBUF mb;
+	DB_THREAD_INFO *ip;
 	u_int32_t indx;
 	int ret;
 
 	env = lt->env;
-
-	if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) != NULL) {
-		__db_errx(env, DB_STR("2047",
-		    "Freeing locker with locks"));
-		return (EINVAL);
+	if (!SH_LIST_EMPTY(&sh_locker->heldby)) {
+		ret = USR_ERR(env, EINVAL);
+		__db_errx(env,
+		    DB_STR("2060", "Freeing locker %x with locks"),
+		    sh_locker->id);
+		DB_MSGBUF_INIT(&mb);
+		(void)__lock_dump_locker(env, &mb, lt, sh_locker);
+		DB_MSGBUF_FLUSH(env, &mb);
+		return (ret);
 	}
 
 	/* If this is part of a family, we must fix up its links. */
@@ -499,16 +563,29 @@ __lock_freelocker_int(lt, region, sh_locker, reallyfree)
 		LOCKER_HASH(lt, region, sh_locker->id, indx);
 		SH_TAILQ_REMOVE(&lt->locker_tab[indx], sh_locker,
 		    links, __db_locker);
-		if (sh_locker->mtx_locker != MUTEX_INVALID &&
-		    (ret = __mutex_free(env, &sh_locker->mtx_locker)) != 0)
-			return (ret);
-		SH_TAILQ_INSERT_HEAD(&region->free_lockers, sh_locker,
-		    links, __db_locker);
 		SH_TAILQ_REMOVE(&region->lockers, sh_locker,
 		    ulinks, __db_locker);
 		region->nlockers--;
 		STAT_PERFMON2(env,
 		    lock, nlockers, region->nlockers, sh_locker->id);
+		/*
+		 * If this locker is cached in the thread info, zero the id and
+		 * leave it allocated. Otherwise, put it back on the free list.
+		 */
+		ENV_GET_THREAD_INFO(env, ip);
+		if (ip != NULL && ip->dbth_local_locker ==
+		    R_OFFSET(&lt->reginfo, sh_locker)) {
+			DB_ASSERT(env,
+			    MUTEX_IS_BUSY(env, sh_locker->mtx_locker));
+			sh_locker->id = DB_LOCK_INVALIDID;
+		} else {
+			if (sh_locker->mtx_locker != MUTEX_INVALID && (ret =
+			    __mutex_free(env, &sh_locker->mtx_locker)) != 0)
+				return (ret);
+			F_SET(sh_locker, DB_LOCKER_FREE);
+			SH_TAILQ_INSERT_HEAD(&region->free_lockers, sh_locker,
+					links, __db_locker);
+		}
 	}
 
 	return (0);
@@ -518,7 +595,7 @@ __lock_freelocker_int(lt, region, sh_locker, reallyfree)
  * __lock_freelocker
  *	Remove a locker its family from the hash table.
  *
- * This must be called without the locker bucket locked.
+ * This must be called without the lockers mutex locked.
  *
  * PUBLIC: int __lock_freelocker  __P((DB_LOCKTAB *, DB_LOCKER *));
  */
@@ -570,3 +647,42 @@ __lock_familyremove(lt, sh_locker)
 
 	return (ret);
 }
+
+/*
+ * __lock_local_locker_invalidate --
+ *	Search the thread info table's cached lockers and discard any reference
+ *	to this mutex.
+ *
+ * PUBLIC: int __lock_local_locker_invalidate  __P((ENV *, db_mutex_t));
+ */
+int
+__lock_local_locker_invalidate(env, mutex)
+	ENV *env;
+	db_mutex_t mutex;
+{
+	DB_HASHTAB *htab;
+	DB_LOCKER *locker;
+	DB_THREAD_INFO *ip;
+	u_int32_t i;
+	char buf[DB_THREADID_STRLEN];
+
+	htab = env->thr_hashtab;
+	for (i = 0; i < env->thr_nbucket; i++) {
+		SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) {
+			if (ip->dbth_local_locker == INVALID_ROFF)
+				continue;
+			locker = (DB_LOCKER *)R_ADDR(&env->lk_handle->reginfo,
+			    ip->dbth_local_locker);
+			if (locker->mtx_locker == mutex) {
+				__db_msg(env,
+DB_STR_A("2061", "Removing cached locker mutex %lu reference by %s", "%lu %s"),
+				    (u_long)mutex,
+				    env->dbenv->thread_id_string(env->dbenv,
+				    locker->pid, locker->tid, buf));
+				locker->mtx_locker = MUTEX_INVALID;
+				return (0);
+			}
+		}
+	}
+	return (0);
+}
diff --git a/src/lock/lock_list.c b/src/lock/lock_list.c
index 1e3d2a55..5d55e4a0 100644
--- a/src/lock/lock_list.c
+++ b/src/lock/lock_list.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/lock/lock_method.c b/src/lock/lock_method.c
index 0cc2e19d..0e6c0428 100644
--- a/src/lock/lock_method.c
+++ b/src/lock/lock_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/lock/lock_region.c b/src/lock/lock_region.c
index 1aae1815..ecc7ba47 100644
--- a/src/lock/lock_region.c
+++ b/src/lock/lock_region.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -120,13 +120,15 @@ __lock_open(env)
 	}
 
 	/*
-	 * A process joining the region may have reset the lock and transaction
-	 * timeouts.
+	 * Lock and transaction timeouts will be ignored when joining the
+	 * environment, so print a warning if either was set.
 	 */
-	if (dbenv->lk_timeout != 0)
-		region->lk_timeout = dbenv->lk_timeout;
-	if (dbenv->tx_timeout != 0)
-		region->tx_timeout = dbenv->tx_timeout;
+	if (dbenv->lk_timeout != 0 && region->lk_timeout != dbenv->lk_timeout)
+		__db_msg(env, DB_STR("2058",
+"Warning: Ignoring DB_SET_LOCK_TIMEOUT when joining the environment."));
+	if (dbenv->tx_timeout != 0 && region->tx_timeout != dbenv->tx_timeout)
+		__db_msg(env, DB_STR("2059",
+"Warning: Ignoring DB_SET_TXN_TIMEOUT when joining the environment."));
 
 	LOCK_REGION_UNLOCK(env);
 	region_locked = 0;
@@ -396,13 +398,30 @@ __lock_env_refresh(env)
 		    R_ADDR(reginfo, lr->locker_mem_off));
 	}
 
-	/* Detach from the region. */
-	ret = __env_region_detach(env, reginfo, 0);
+	ret = __lock_region_detach(env, lt);
 
-	/* Discard DB_LOCKTAB. */
-	__os_free(env, lt);
-	env->lk_handle = NULL;
+	return (ret);
+}
+
+/*
+ * __lock_region_detach --
+ *
+ * PUBLIC: int __lock_region_detach __P((ENV *, DB_LOCKTAB *));
+ */
+int
+__lock_region_detach(env, lt)
+	ENV *env;
+	DB_LOCKTAB *lt;
+{
+	int ret;
 
+	ret = 0;
+	if (lt != NULL) {
+		ret = __env_region_detach(env, &lt->reginfo, 0);
+		/* Discard DB_LOCKTAB. */
+		__os_free(env, lt);
+		env->lk_handle = NULL;
+	}
 	return (ret);
 }
 
diff --git a/src/lock/lock_stat.c b/src/lock/lock_stat.c
index 11b934aa..1ce0796a 100644
--- a/src/lock/lock_stat.c
+++ b/src/lock/lock_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -15,8 +15,6 @@
 #include "dbinc/db_am.h"
 
 #ifdef HAVE_STATISTICS
-static int  __lock_dump_locker
-		__P((ENV *, DB_MSGBUF *, DB_LOCKTAB *, DB_LOCKER *));
 static int  __lock_dump_object __P((DB_LOCKTAB *, DB_MSGBUF *, DB_LOCKOBJ *));
 static int  __lock_print_all __P((ENV *, u_int32_t));
 static int  __lock_print_stats __P((ENV *, u_int32_t));
@@ -363,6 +361,11 @@ __lock_print_stats(env, flags)
 	__db_dl(env, "Maximum number of lockers at any one time",
 	    (u_long)sp->st_maxnlockers);
 	__db_dl(env,
+	    "Number of hits in the thread locker cache",
+	    (u_long)sp->st_nlockers_hit);
+	__db_dl(env,
+	    "Total number of lockers reused", (u_long)sp->st_nlockers_reused);
+	__db_dl(env,
 	    "Number of current lock objects", (u_long)sp->st_nobjects);
 	__db_dl(env, "Maximum number of lock objects at any one time",
 	    (u_long)sp->st_maxnobjects);
@@ -463,9 +466,17 @@ __lock_print_all(env, flags)
 		if (timespecisset(&lrp->next_timeout)) {
 #ifdef HAVE_STRFTIME
 			time_t t = (time_t)lrp->next_timeout.tv_sec;
+			struct tm *tm_p;
 			char tbuf[64];
+#ifdef HAVE_LOCALTIME_R
+			struct tm tm;
+
+			tm_p = localtime_r(&t, &tm);
+#else
+			tm_p = localtime(&t);
+#endif
 			if (strftime(tbuf, sizeof(tbuf),
-			    "%m-%d-%H:%M:%S", localtime(&t)) != 0)
+			    "%m-%d-%H:%M:%S", tm_p) != 0)
 				__db_msg(env, "next_timeout: %s.%09lu",
 				     tbuf, (u_long)lrp->next_timeout.tv_nsec);
 			else
@@ -519,80 +530,6 @@ __lock_print_all(env, flags)
 }
 
 static int
-__lock_dump_locker(env, mbp, lt, lip)
-	ENV *env;
-	DB_MSGBUF *mbp;
-	DB_LOCKTAB *lt;
-	DB_LOCKER *lip;
-{
-	DB_LOCKREGION *lrp;
-	struct __db_lock *lp;
-	char buf[DB_THREADID_STRLEN];
-	u_int32_t ndx;
-
-	lrp = lt->reginfo.primary;
-
-	__db_msgadd(env,
-	    mbp, "%8lx dd=%2ld locks held %-4d write locks %-4d pid/thread %s",
-	    (u_long)lip->id, (long)lip->dd_id, lip->nlocks, lip->nwrites,
-	    env->dbenv->thread_id_string(env->dbenv, lip->pid, lip->tid, buf));
-	__db_msgadd(env, mbp,
-	    " flags %-4x priority %-10u", lip->flags, lip->priority);
-
-	if (timespecisset(&lip->tx_expire)) {
-#ifdef HAVE_STRFTIME
-		time_t t = (time_t)lip->tx_expire.tv_sec;
-		char tbuf[64];
-		if (strftime(tbuf, sizeof(tbuf),
-		    "%m-%d-%H:%M:%S", localtime(&t)) != 0)
-			__db_msgadd(env, mbp, "expires %s.%09lu",
-			    tbuf, (u_long)lip->tx_expire.tv_nsec);
-		else
-#endif
-			__db_msgadd(env, mbp, "expires %lu.%09lu",
-			    (u_long)lip->tx_expire.tv_sec,
-			    (u_long)lip->tx_expire.tv_nsec);
-	}
-	if (F_ISSET(lip, DB_LOCKER_TIMEOUT))
-		__db_msgadd(
-		    env, mbp, " lk timeout %lu", (u_long)lip->lk_timeout);
-	if (timespecisset(&lip->lk_expire)) {
-#ifdef HAVE_STRFTIME
-		time_t t = (time_t)lip->lk_expire.tv_sec;
-		char tbuf[64];
-		if (strftime(tbuf,
-		    sizeof(tbuf), "%m-%d-%H:%M:%S", localtime(&t)) != 0)
-			__db_msgadd(env, mbp, " lk expires %s.%09lu",
-			    tbuf, (u_long)lip->lk_expire.tv_nsec);
-		else
-#endif
-			__db_msgadd(env, mbp, " lk expires %lu.%09lu",
-			    (u_long)lip->lk_expire.tv_sec,
-			    (u_long)lip->lk_expire.tv_nsec);
-	}
-	DB_MSGBUF_FLUSH(env, mbp);
-
-	/*
-	 * We need some care here since the list may change while we
-	 * look.
-	 */
-retry:	SH_LIST_FOREACH(lp, &lip->heldby, locker_links, __db_lock) {
-		if (!SH_LIST_EMPTY(&lip->heldby) && lp != NULL) {
-			ndx = lp->indx;
-			OBJECT_LOCK_NDX(lt, lrp, ndx);
-			if (lp->indx == ndx)
-				__lock_printlock(lt, mbp, lp, 1);
-			else {
-				OBJECT_UNLOCK(lt, lrp, ndx);
-				goto retry;
-			}
-			OBJECT_UNLOCK(lt, lrp, ndx);
-		}
-	}
-	return (0);
-}
-
-static int
 __lock_dump_object(lt, mbp, op)
 	DB_LOCKTAB *lt;
 	DB_MSGBUF *mbp;
@@ -619,6 +556,31 @@ __lock_print_header(env)
 	    "Count", "Status", "----------------- Object ---------------");
 }
 
+#else /* !HAVE_STATISTICS */
+
+int
+__lock_stat_pp(dbenv, statp, flags)
+	DB_ENV *dbenv;
+	DB_LOCK_STAT **statp;
+	u_int32_t flags;
+{
+	COMPQUIET(statp, NULL);
+	COMPQUIET(flags, 0);
+
+	return (__db_stat_not_built(dbenv->env));
+}
+
+int
+__lock_stat_print_pp(dbenv, flags)
+	DB_ENV *dbenv;
+	u_int32_t flags;
+{
+	COMPQUIET(flags, 0);
+
+	return (__db_stat_not_built(dbenv->env));
+}
+#endif
+
 /*
  * __lock_printlock --
  *
@@ -744,27 +706,81 @@ __lock_printlock(lt, mbp, lp, ispgno)
 	DB_MSGBUF_FLUSH(env, mbp);
 }
 
-#else /* !HAVE_STATISTICS */
-
+/*
+ * __lock_dump_locker --
+ *	Display the identity and statistics of a locker. This is used during
+ *	diagnostic error paths as well as when printing statistics.
+ *
+ * PUBLIC: int  __lock_dump_locker
+ * PUBLIC:     __P((ENV *, DB_MSGBUF *, DB_LOCKTAB *, DB_LOCKER *));
+ */
 int
-__lock_stat_pp(dbenv, statp, flags)
-	DB_ENV *dbenv;
-	DB_LOCK_STAT **statp;
-	u_int32_t flags;
+__lock_dump_locker(env, mbp, lt, lip)
+	ENV *env;
+	DB_MSGBUF *mbp;
+	DB_LOCKTAB *lt;
+	DB_LOCKER *lip;
 {
-	COMPQUIET(statp, NULL);
-	COMPQUIET(flags, 0);
+	DB_LOCKREGION *lrp;
+	struct __db_lock *lp;
+	char buf[DB_THREADID_STRLEN];
+	u_int32_t ndx;
 
-	return (__db_stat_not_built(dbenv->env));
-}
+	lrp = lt->reginfo.primary;
 
-int
-__lock_stat_print_pp(dbenv, flags)
-	DB_ENV *dbenv;
-	u_int32_t flags;
-{
-	COMPQUIET(flags, 0);
+	__db_msgadd(env,
+	    mbp, "%8lx dd=%2ld locks held %-4d write locks %-4d pid/thread %s",
+	    (u_long)lip->id, (long)lip->dd_id, lip->nlocks, lip->nwrites,
+	    env->dbenv->thread_id_string(env->dbenv, lip->pid, lip->tid, buf));
+	__db_msgadd(env, mbp,
+	    " flags %-4x priority %-10u", lip->flags, lip->priority);
 
-	return (__db_stat_not_built(dbenv->env));
-}
+	if (timespecisset(&lip->tx_expire)) {
+#ifdef HAVE_STRFTIME
+		time_t t = (time_t)lip->tx_expire.tv_sec;
+		char tbuf[64];
+		if (strftime(tbuf, sizeof(tbuf),
+		    "%m-%d-%H:%M:%S", localtime(&t)) != 0)
+			__db_msgadd(env, mbp, "expires %s.%09lu",
+			    tbuf, (u_long)lip->tx_expire.tv_nsec);
+		else
 #endif
+			__db_msgadd(env, mbp, "expires %lu.%09lu",
+			    (u_long)lip->tx_expire.tv_sec,
+			    (u_long)lip->tx_expire.tv_nsec);
+	}
+	if (F_ISSET(lip, DB_LOCKER_TIMEOUT))
+		__db_msgadd(
+		    env, mbp, " lk timeout %lu", (u_long)lip->lk_timeout);
+	if (timespecisset(&lip->lk_expire)) {
+#ifdef HAVE_STRFTIME
+		time_t t = (time_t)lip->lk_expire.tv_sec;
+		char tbuf[64];
+		if (strftime(tbuf,
+		    sizeof(tbuf), "%m-%d-%H:%M:%S", localtime(&t)) != 0)
+			__db_msgadd(env, mbp, " lk expires %s.%09lu",
+			    tbuf, (u_long)lip->lk_expire.tv_nsec);
+		else
+#endif
+			__db_msgadd(env, mbp, " lk expires %lu.%09lu",
+			    (u_long)lip->lk_expire.tv_sec,
+			    (u_long)lip->lk_expire.tv_nsec);
+	}
+	DB_MSGBUF_FLUSH(env, mbp);
+
+	/* We need some care here since the list may change while we look. */
+retry:	SH_LIST_FOREACH(lp, &lip->heldby, locker_links, __db_lock) {
+		if (!SH_LIST_EMPTY(&lip->heldby) && lp != NULL) {
+			ndx = lp->indx;
+			OBJECT_LOCK_NDX(lt, lrp, ndx);
+			if (lp->indx == ndx)
+				__lock_printlock(lt, mbp, lp, 1);
+			else {
+				OBJECT_UNLOCK(lt, lrp, ndx);
+				goto retry;
+			}
+			OBJECT_UNLOCK(lt, lrp, ndx);
+		}
+	}
+	return (0);
+}
diff --git a/src/lock/lock_stub.c b/src/lock/lock_stub.c
index 3875af55..a916c6df 100644
--- a/src/lock/lock_stub.c
+++ b/src/lock/lock_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -359,6 +359,7 @@ size_t
 __lock_region_max(env)
 	ENV *env;
 {
+	COMPQUIET(env, NULL);
 	return (0);
 }
 
@@ -367,6 +368,7 @@ __lock_region_size(env, other_alloc)
 	ENV *env;
 	size_t other_alloc;
 {
+	COMPQUIET(env, NULL);
 	COMPQUIET(other_alloc, 0);
 	return (0);
 }
@@ -584,6 +586,7 @@ __lock_list_print(env, mbp, list)
 	DBT *list;
 {
 	COMPQUIET(env, NULL);
+	COMPQUIET(mbp, NULL);
 	COMPQUIET(list, NULL);
 }
 
@@ -625,7 +628,7 @@ __lock_change(env, old_lock, new_lock)
 	ENV *env;
 	DB_LOCK *old_lock, *new_lock;
 {
-	COMPQUIET(env, NULL);
 	COMPQUIET(old_lock, NULL);
 	COMPQUIET(new_lock, NULL);
+	return (__db_nolocking(env));
 }
diff --git a/src/lock/lock_timer.c b/src/lock/lock_timer.c
index 943047f0..9744438a 100644
--- a/src/lock/lock_timer.c
+++ b/src/lock/lock_timer.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/lock/lock_util.c b/src/lock/lock_util.c
index f7029cd7..07fdce72 100644
--- a/src/lock/lock_util.c
+++ b/src/lock/lock_util.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/log/log.c b/src/log/log.c
index 5808145f..9bef8d69 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -32,6 +32,7 @@ __log_open(env)
 	DB_ENV *dbenv;
 	DB_LOG *dblp;
 	LOG *lp;
+	u_int32_t log_flags;
 	u_int8_t *bulk;
 	int region_locked, ret;
 
@@ -130,47 +131,59 @@ __log_open(env)
 		}
 	} else {
 		/*
-		 * A process joining the region may have reset the log file
-		 * size, too.  If so, it only affects the next log file we
-		 * create.  We need to check that the size is reasonable given
-		 * the buffer size in the region.
+		 * The log file size and DB_LOG_AUTO_REMOVE will be ignored
+		 * when joining the environment, so print a warning if either
+		 * was set.
 		 */
-		LOG_SYSTEM_LOCK(env);
-		region_locked = 1;
-
-		 if (dbenv->lg_size != 0) {
-			if ((ret =
-			    __log_check_sizes(env, dbenv->lg_size, 0)) != 0)
-				goto err;
-
-			lp->log_nsize = dbenv->lg_size;
-		 }
-
-		LOG_SYSTEM_UNLOCK(env);
-		region_locked = 0;
-
-		if (dbenv->lg_flags != 0 && (ret =
-		    __log_set_config_int(dbenv, dbenv->lg_flags, 1, 0)) != 0)
+		 if (dbenv->lg_size != 0 && lp->log_nsize != dbenv->lg_size)
+			__db_msg(env, DB_STR("2585",
+"Warning: Ignoring maximum log file size when joining the environment"));
+
+		log_flags = dbenv->lg_flags & ~DB_LOG_AUTO_REMOVE;
+		if ((dbenv->lg_flags & DB_LOG_AUTO_REMOVE) &&
+		    lp->db_log_autoremove == 0)
+			__db_msg(env, DB_STR("2586",
+"Warning: Ignoring DB_LOG_AUTO_REMOVE when joining the environment."));
+		if (log_flags != 0 && (ret =
+		    __log_set_config_int(dbenv, log_flags, 1, 0)) != 0)
 			return (ret);
 	}
 	dblp->reginfo.mtx_alloc = lp->mtx_region;
 
 	return (0);
 
-err:	if (dblp->reginfo.addr != NULL) {
-		if (region_locked)
-			LOG_SYSTEM_UNLOCK(env);
-		(void)__env_region_detach(env, &dblp->reginfo, 0);
-	}
-	env->lg_handle = NULL;
-
+err:	if (region_locked)
+		LOG_SYSTEM_UNLOCK(env);
 	(void)__mutex_free(env, &dblp->mtx_dbreg);
-	__os_free(env, dblp);
+	(void)__log_region_detach(env, dblp);
 
 	return (ret);
 }
 
 /*
+ * __log_region_detach --
+ *
+ * PUBLIC: int __log_region_detach __P((ENV *, DB_LOG *));
+ */
+int
+__log_region_detach(env, dblp)
+	ENV *env;
+	DB_LOG *dblp;
+{
+	int ret;
+
+	ret = 0;
+	if (dblp != NULL) {
+		if (dblp->reginfo.addr != NULL)
+			ret = __env_region_detach(env, &dblp->reginfo, 0);
+		/* Discard DB_LOG. */
+		__os_free(env, dblp);
+		env->lg_handle = NULL;
+	}
+	return (ret);
+}
+
+/*
  * __log_init --
  *	Initialize a log region in shared memory.
  */
@@ -638,7 +651,6 @@ __log_valid(dblp, number, set_persist, fhpp, flags, statusp, versionp)
 	recsize = sizeof(LOGP);
 	if (CRYPTO_ON(env)) {
 		hdrsize = HDR_CRYPTO_SZ;
-		recsize = sizeof(LOGP);
 		recsize += db_cipher->adj_size(recsize);
 		is_hmac = 1;
 	}
@@ -700,7 +712,7 @@ __log_valid(dblp, number, set_persist, fhpp, flags, statusp, versionp)
 		 * we can only detect that by having an unreasonable
 		 * data length for our persistent data.
 		 */
-		if ((hdr->len - hdrsize) != sizeof(LOGP)) {
+		if ((hdr->len - hdrsize) != recsize) {
 			__db_errx(env, "log record size mismatch");
 			goto err;
 		}
@@ -722,10 +734,10 @@ __log_valid(dblp, number, set_persist, fhpp, flags, statusp, versionp)
 			    hdr->len - hdrsize, is_hmac)) != 0)
 				goto bad_checksum;
 			/*
- 			 * The checksum verifies without the header.  Make note
- 			 * of that, because it is only acceptable when the log
- 			 * version < DB_LOGCHKSUM.  Later, when we determine log
- 			 * version, we will confirm this.
+			 * The checksum verifies without the header.  Make note
+			 * of that, because it is only acceptable when the log
+			 * version < DB_LOGCHKSUM.  Later, when we determine log
+			 * version, we will confirm this.
 			 */
 			chksum_includes_hdr = 0;
 		}
@@ -800,7 +812,7 @@ __log_valid(dblp, number, set_persist, fhpp, flags, statusp, versionp)
 		/*
 		 * We might have to declare a checksum failure here, if:
 		 * - the checksum verified only by ignoring the header, and
-		 * - the log version indicates that the header should have 
+		 * - the log version indicates that the header should have
 		 * been included.
 		 */
 		if (!chksum_includes_hdr && logversion >= DB_LOGCHKSUM)
@@ -899,66 +911,69 @@ __log_env_refresh(env)
 	/*
 	 * After we close the files, check for any unlogged closes left in
 	 * the shared memory queue.  If we find any, try to log it, otherwise
-	 * return the error.  We cannot say the environment was closed
-	 * cleanly.
+	 * return the error; we cannot say the environment was closed cleanly.
+	 * This does not use the typical MUTEX_LOCK(), but MUTEX_LOCK_RET(). The
+	 * normal function would immediately return DB_RUNRECOVERY if we are
+	 * closing the env down during a panic. By using MUTEX_LOCK_RET(), we
+	 * continue with the rest of the cleanup.
 	 */
-	MUTEX_LOCK(env, lp->mtx_filelist);
-	SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname)
-		if (F_ISSET(fnp, DB_FNAME_NOTLOGGED) &&
-		    (t_ret = __dbreg_close_id_int(
-		    env, fnp, DBREG_CLOSE, 1)) != 0)
-			ret = t_ret;
-	MUTEX_UNLOCK(env, lp->mtx_filelist);
-
+	if (MUTEX_LOCK_RET(env, lp->mtx_filelist) == 0) {
+		SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname)
+			if (F_ISSET(fnp, DB_FNAME_NOTLOGGED) &&
+			    (t_ret = __dbreg_close_id_int(
+			    env, fnp, DBREG_CLOSE, 1)) != 0)
+				ret = t_ret;
+		MUTEX_UNLOCK(env, lp->mtx_filelist);
+	}
 	/*
-	 * If a private region, return the memory to the heap.  Not needed for
-	 * filesystem-backed or system shared memory regions, that memory isn't
-	 * owned by any particular process.
+	 * If a private region, return the memory to the heap.  Not
+	 * needed for filesystem-backed or system shared memory regions,
+	 * that memory isn't owned by any particular process.
 	 */
 	if (F_ISSET(env, ENV_PRIVATE)) {
-		reginfo->mtx_alloc = MUTEX_INVALID;
-		/* Discard the flush mutex. */
-		if ((t_ret =
-		    __mutex_free(env, &lp->mtx_flush)) != 0 && ret == 0)
-			ret = t_ret;
-
-		/* Discard the buffer. */
-		__env_alloc_free(reginfo, R_ADDR(reginfo, lp->buffer_off));
-
-		/* Discard stack of free file IDs. */
-		if (lp->free_fid_stack != INVALID_ROFF)
-			__env_alloc_free(reginfo,
-			    R_ADDR(reginfo, lp->free_fid_stack));
-
-		/* Discard the list of in-memory log file markers. */
-		while ((filestart = SH_TAILQ_FIRST(&lp->logfiles,
-		    __db_filestart)) != NULL) {
-			SH_TAILQ_REMOVE(&lp->logfiles, filestart, links,
-			    __db_filestart);
-			__env_alloc_free(reginfo, filestart);
-		}
-
-		while ((filestart = SH_TAILQ_FIRST(&lp->free_logfiles,
-		    __db_filestart)) != NULL) {
-			SH_TAILQ_REMOVE(&lp->free_logfiles, filestart, links,
-			    __db_filestart);
-			__env_alloc_free(reginfo, filestart);
-		}
-
-		/* Discard commit queue elements. */
-		while ((commit = SH_TAILQ_FIRST(&lp->free_commits,
-		    __db_commit)) != NULL) {
-			SH_TAILQ_REMOVE(&lp->free_commits, commit, links,
-			    __db_commit);
-			__env_alloc_free(reginfo, commit);
-		}
-
-		/* Discard replication bulk buffer. */
-		if (lp->bulk_buf != INVALID_ROFF) {
-			__env_alloc_free(reginfo,
-			    R_ADDR(reginfo, lp->bulk_buf));
-			lp->bulk_buf = INVALID_ROFF;
-		}
+	    reginfo->mtx_alloc = MUTEX_INVALID;
+	    /* Discard the flush mutex. */
+	    if ((t_ret =
+		__mutex_free(env, &lp->mtx_flush)) != 0 && ret == 0)
+		    ret = t_ret;
+
+	    /* Discard the log buffer. */
+	    __env_alloc_free(reginfo, R_ADDR(reginfo, lp->buffer_off));
+
+	    /* Discard stack of free file IDs. */
+	    if (lp->free_fid_stack != INVALID_ROFF)
+		    __env_alloc_free(reginfo,
+			R_ADDR(reginfo, lp->free_fid_stack));
+
+	    /* Discard the list of in-memory log file markers. */
+	    while ((filestart = SH_TAILQ_FIRST(&lp->logfiles,
+		__db_filestart)) != NULL) {
+		    SH_TAILQ_REMOVE(&lp->logfiles, filestart, links,
+			__db_filestart);
+		    __env_alloc_free(reginfo, filestart);
+	    }
+
+	    while ((filestart = SH_TAILQ_FIRST(&lp->free_logfiles,
+		__db_filestart)) != NULL) {
+		    SH_TAILQ_REMOVE(&lp->free_logfiles, filestart, links,
+			__db_filestart);
+		    __env_alloc_free(reginfo, filestart);
+	    }
+
+	    /* Discard commit queue elements. */
+	    while ((commit = SH_TAILQ_FIRST(&lp->free_commits,
+		__db_commit)) != NULL) {
+		    SH_TAILQ_REMOVE(&lp->free_commits, commit, links,
+			__db_commit);
+		    __env_alloc_free(reginfo, commit);
+	    }
+
+	    /* Discard replication bulk buffer. */
+	    if (lp->bulk_buf != INVALID_ROFF) {
+		    __env_alloc_free(reginfo,
+			R_ADDR(reginfo, lp->bulk_buf));
+		    lp->bulk_buf = INVALID_ROFF;
+	    }
 	}
 
 	/* Discard the per-thread DBREG mutex. */
@@ -1394,7 +1409,7 @@ __log_inmem_lsnoff(dblp, lsnp, offsetp)
 			return (0);
 		}
 
-	return (DB_NOTFOUND);
+	return (USR_ERR(dblp->env, DB_NOTFOUND));
 }
 
 /*
diff --git a/src/log/log_archive.c b/src/log/log_archive.c
index 280a2071..fb98e10b 100644
--- a/src/log/log_archive.c
+++ b/src/log/log_archive.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -304,7 +304,7 @@ __log_get_stable_lsn(env, stable_lsn, group_wide)
 		 * so that the caller knows it may be done.
 		 */
 		if (IS_ZERO_LSN(*stable_lsn)) {
-			ret = DB_NOTFOUND;
+			ret = USR_ERR(env, DB_NOTFOUND);
 			goto err;
 		}
 	} else if ((ret = __txn_getckp(env, stable_lsn)) != 0)
diff --git a/src/log/log_compare.c b/src/log/log_compare.c
index 97b59338..9bd28854 100644
--- a/src/log/log_compare.c
+++ b/src/log/log_compare.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/log/log_debug.c b/src/log/log_debug.c
index 32fb2542..d8f10798 100644
--- a/src/log/log_debug.c
+++ b/src/log/log_debug.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/log/log_get.c b/src/log/log_get.c
index db30c969..332dab8e 100644
--- a/src/log/log_get.c
+++ b/src/log/log_get.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -472,7 +472,7 @@ nextrec:
 			/* If at start-of-file, move to the previous file. */
 			if (nlsn.offset == 0) {
 				if (nlsn.file == 1) {
-					ret = DB_NOTFOUND;
+					ret = USR_ERR(env, DB_NOTFOUND);
 					goto err;
 				}
 				if ((!lp->db_log_inmemory &&
@@ -480,7 +480,7 @@ nextrec:
 				    0, &status, NULL) != 0 ||
 				    (status != DB_LV_NORMAL &&
 				    status != DB_LV_OLD_READABLE)))) {
-					ret = DB_NOTFOUND;
+					ret = USR_ERR(env, DB_NOTFOUND);
 					goto err;
 				}
 
@@ -607,7 +607,7 @@ nohdr:		switch (flags) {
 			if (eof && logc->bp_lsn.file != nlsn.file)
 				__db_errx(env, DB_STR_A("2583",
 	     "Log file %d not found, check log directory configuration", "%d"),
-	     			     nlsn.file);
+				     nlsn.file);
 			else
 				__db_errx(env, DB_STR("2576",
 		"Encountered zero length records while traversing backwards"));
@@ -624,7 +624,7 @@ nohdr:		switch (flags) {
 			/* FALLTHROUGH */
 		case DB_SET:
 		default:
-			ret = DB_NOTFOUND;
+			ret = USR_ERR(env, DB_NOTFOUND);
 			goto err;
 		}
 	}
@@ -830,7 +830,7 @@ __logc_incursor(logc, lsn, hdr, pp)
 	if (LOG_SWAPPED(env))
 		__log_hdrswap(hdr, CRYPTO_ON(env));
 	if (__logc_hdrchk(logc, lsn, hdr, &eof))
-		return (DB_NOTFOUND);
+		return (USR_ERR(env, DB_NOTFOUND));
 	if (eof || logc->bp_lsn.offset + logc->bp_rlen < lsn->offset + hdr->len)
 		return (0);
 
@@ -914,7 +914,7 @@ __logc_inregion(logc, lsn, rlockp, last_lsn, hdr, pp, need_cksump)
 	if (IS_ZERO_LSN(lp->lsn))
 		return (0);
 	if (LOG_COMPARE(lsn, &lp->lsn) >= 0)
-		return (DB_NOTFOUND);
+		return (USR_ERR(env, DB_NOTFOUND));
 	else if (lp->db_log_inmemory) {
 		if ((ret = __log_inmem_lsnoff(dblp, lsn, &b_region)) != 0)
 			return (ret);
@@ -949,14 +949,14 @@ __logc_inregion(logc, lsn, rlockp, last_lsn, hdr, pp, need_cksump)
 		if (LOG_SWAPPED(env))
 			__log_hdrswap(hdr, CRYPTO_ON(env));
 		if (__logc_hdrchk(logc, lsn, hdr, &eof) != 0)
-			return (DB_NOTFOUND);
+			return (USR_ERR(env, DB_NOTFOUND));
 		if (eof)
 			return (0);
 		if (lp->db_log_inmemory) {
 			if (RINGBUF_LEN(lp, b_region, lp->b_off) < hdr->len)
-				return (DB_NOTFOUND);
+				return (USR_ERR(env, DB_NOTFOUND));
 		} else if (lsn->offset + hdr->len > lp->w_off + lp->buffer_size)
-			return (DB_NOTFOUND);
+			return (USR_ERR(env, DB_NOTFOUND));
 		if (logc->bp_size <= hdr->len) {
 			len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128);
 			if ((ret =
@@ -1535,6 +1535,10 @@ __log_read_record(env, dbpp, td, recbuf, spec, size, argpp)
 			LOGCOPY_32(env, ap + sp->offset, bp);
 			bp += sizeof(uinttmp);
 			break;
+		case LOGREC_LONGARG:
+			LOGCOPY_64(env, ap + sp->offset, bp);
+			bp += sizeof(u_int64_t);
+			break;
 		case LOGREC_OP:
 			LOGCOPY_32(env, &op, bp);
 			*(u_int32_t *)(ap + sp->offset) = op;
diff --git a/src/log/log_method.c b/src/log/log_method.c
index d5aec116..09fbe863 100644
--- a/src/log/log_method.c
+++ b/src/log/log_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -342,6 +342,10 @@ __log_get_flags(dbenv, flagsp)
 		LF_SET(DB_LOG_IN_MEMORY);
 	else
 		LF_CLR(DB_LOG_IN_MEMORY);
+	if (lp->nosync)
+		LF_SET(DB_LOG_NOSYNC);
+	else
+		LF_CLR(DB_LOG_NOSYNC);
 	*flagsp = flags;
 }
 
@@ -369,6 +373,8 @@ __log_set_flags(env, flags, on)
 		lp->db_log_autoremove = on ? 1 : 0;
 	if (LF_ISSET(DB_LOG_IN_MEMORY))
 		lp->db_log_inmemory = on ? 1 : 0;
+	if (LF_ISSET(DB_LOG_NOSYNC))
+		lp->nosync = on ? 1 : 0;
 }
 
 /*
@@ -377,13 +383,15 @@ __log_set_flags(env, flags, on)
  */
 #undef	OK_FLAGS
 #define	OK_FLAGS							\
-    (DB_LOG_AUTO_REMOVE | DB_LOG_DIRECT |				\
-    DB_LOG_DSYNC | DB_LOG_IN_MEMORY | DB_LOG_ZERO)
+    (DB_LOG_AUTO_REMOVE | DB_LOG_BLOB | DB_LOG_DIRECT |			\
+     DB_LOG_DSYNC | DB_LOG_IN_MEMORY | DB_LOG_NOSYNC | DB_LOG_ZERO)
 static const FLAG_MAP LogMap[] = {
 	{ DB_LOG_AUTO_REMOVE,	DBLOG_AUTOREMOVE},
+	{ DB_LOG_BLOB,		DBLOG_BLOB},
 	{ DB_LOG_DIRECT,	DBLOG_DIRECT},
 	{ DB_LOG_DSYNC,		DBLOG_DSYNC},
 	{ DB_LOG_IN_MEMORY,	DBLOG_INMEMORY},
+	{ DB_LOG_NOSYNC,	DBLOG_NOSYNC},
 	{ DB_LOG_ZERO,		DBLOG_ZERO}
 };
 /*
@@ -406,10 +414,14 @@ __log_get_config(dbenv, which, onp)
 	if (FLD_ISSET(which, ~OK_FLAGS))
 		return (__db_ferr(env, "DB_ENV->log_get_config", 0));
 	dblp = env->lg_handle;
-	ENV_REQUIRES_CONFIG(env, dblp, "DB_ENV->log_get_config", DB_INIT_LOG);
+	ENV_NOT_CONFIGURED(env, dblp, "DB_ENV->log_get_config", DB_INIT_LOG);
+
+	if (LOGGING_ON(env)) {
+		__env_fetch_flags(LogMap, sizeof(LogMap), &dblp->flags, &flags);
+		__log_get_flags(dbenv, &flags);
+	} else
+		flags = dbenv->lg_flags;
 
-	__env_fetch_flags(LogMap, sizeof(LogMap), &dblp->flags, &flags);
-	__log_get_flags(dbenv, &flags);
 	if (LF_ISSET(which))
 		*onp = 1;
 	else
@@ -459,6 +471,17 @@ __log_set_config_int(dbenv, flags, on, in_open)
 "DB_ENV->log_set_config: direct I/O either not configured or not supported");
 		return (EINVAL);
 	}
+	if (REP_ON(env) && LF_ISSET(DB_LOG_BLOB) && !on) {
+		__db_errx(env,
+"DB_ENV->log_set_config: DB_LOG_BLOB must be enabled with replication.");
+		return (EINVAL);
+	}
+	if (FLD_ISSET(flags, DB_LOG_IN_MEMORY) && on > 0 &&
+	    PREFMAS_IS_SET(env)) {
+		__db_errx(env, DB_STR("2587", "DB_LOG_IN_MEMORY is not "
+		    "supported in Replication Manager preferred master mode"));
+		return (EINVAL);
+	}
 
 	if (LOGGING_ON(env)) {
 		if (!in_open && LF_ISSET(DB_LOG_IN_MEMORY) &&
diff --git a/src/log/log_print.c b/src/log/log_print.c
index d2cda519..e5c920b6 100644
--- a/src/log/log_print.c
+++ b/src/log/log_print.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -37,6 +37,7 @@ __log_print_record(env, recbuf, lsnp, name, spec, info)
 	LOG *lp;
 	PAGE *hdrstart, *hdrtmp;
 	int32_t inttmp;
+	u_int64_t ulltmp;
 	u_int32_t hdrsize, op, uinttmp;
 	u_int32_t type, txnid;
 	u_int8_t *bp, *datatmp;
@@ -150,6 +151,14 @@ __log_print_record(env, recbuf, lsnp, name, spec, info)
 			__db_msgadd(env, &msgbuf,  "\n");
 			bp += sizeof(uinttmp);
 			break;
+		case LOGREC_LONGARG:
+			LOGCOPY_64(env, &ulltmp, bp);
+			__db_msgadd(env, &msgbuf,  "\t%s: ", sp->name);
+			__db_msgadd(env,
+			    &msgbuf,  "%llu", (unsigned long long)ulltmp);
+			__db_msgadd(env, &msgbuf,  "\n");
+			bp += sizeof(ulltmp);
+			break;
 		case LOGREC_TIME:
 			/* time_t is long but we only store 32 bits. */
 			LOGCOPY_32(env, &uinttmp, bp);
diff --git a/src/log/log_put.c b/src/log/log_put.c
index 8f7e23d8..4d6c3d2f 100644
--- a/src/log/log_put.c
+++ b/src/log/log_put.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -280,8 +280,7 @@ __log_put(env, lsnp, udbt, flags)
 		 * If the send fails and we're a commit or checkpoint,
 		 * there's nothing we can do;  the record's in the log.
 		 * Flush it, even if we're running with TXN_NOSYNC,
-		 * on the grounds that it should be in durable
-		 * form somewhere.
+		 * on the grounds that it should be in durable form somewhere.
 		 */
 		if (ret != 0 && FLD_ISSET(ctlflags, REPCTL_PERM))
 			LF_SET(DB_FLUSH);
@@ -473,12 +472,12 @@ __log_put_next(env, lsn, dbt, hdr, old_lsnp)
 	 */
 	if (adv_file || lp->lsn.offset == 0 ||
 	    lp->lsn.offset + hdr->size + dbt->size > lp->log_size) {
-		if (hdr->size + sizeof(LOGP) + dbt->size > lp->log_size) {
+		if (hdr->size + sizeof(LOGP) + dbt->size > lp->log_nsize) {
 			__db_errx(env, DB_STR_A("2513",
 	    "DB_ENV->log_put: record larger than maximum file size (%lu > %lu)",
 			    "%lu %lu"),
 			    (u_long)hdr->size + sizeof(LOGP) + dbt->size,
-			    (u_long)lp->log_size);
+			    (u_long)lp->log_nsize);
 			return (EINVAL);
 		}
 
@@ -561,7 +560,12 @@ __log_flush_commit(env, lsnp, flags)
 		    "Write failed on MASTER commit."));
 		return (__env_panic(env, ret));
 	}
-
+	/*
+	 * If this is a panic don't attempt to abort just this transaction;
+	 * it may trip over the panic, and the whole env needs to go anyway.
+	 */
+	if (ret == DB_RUNRECOVERY)
+		return (__env_panic(env, ret));
 	/*
 	 * Else, make sure that the commit record does not get out after we
 	 * abort the transaction.  Do this by overwriting the commit record
@@ -735,7 +739,7 @@ __log_newfile(dblp, lsnp, logfile, version)
 		__log_persistswap(tpersist);
 
 	if ((ret =
-	    __log_encrypt_record(env, &t, &hdr, (u_int32_t)tsize)) != 0)
+	    __log_encrypt_record(env, &t, &hdr, (u_int32_t)sizeof(LOGP))) != 0)
 		goto err;
 
 	if ((ret = __log_putr(dblp, &lsn,
@@ -1118,12 +1122,15 @@ flush:	MUTEX_LOCK(env, lp->mtx_flush);
 		LOG_SYSTEM_UNLOCK(env);
 
 	/* Sync all writes to disk. */
-	if ((ret = __os_fsync(env, dblp->lfhp)) != 0) {
-		MUTEX_UNLOCK(env, lp->mtx_flush);
-		if (release)
-			LOG_SYSTEM_LOCK(env);
-		lp->in_flush--;
-		goto done;
+	if (!lp->nosync) {
+		if ((ret = __os_fsync(env, dblp->lfhp)) != 0) {
+			MUTEX_UNLOCK(env, lp->mtx_flush);
+			if (release)
+				LOG_SYSTEM_LOCK(env);
+			lp->in_flush--;
+			goto done;
+		}
+		STAT(++lp->stat.st_scount);
 	}
 
 	/*
@@ -1143,7 +1150,6 @@ flush:	MUTEX_LOCK(env, lp->mtx_flush);
 		LOG_SYSTEM_LOCK(env);
 
 	lp->in_flush--;
-	STAT(++lp->stat.st_scount);
 
 	/*
 	 * How many flush calls (usually commits) did this call actually sync?
@@ -1440,7 +1446,7 @@ __log_newfh(dblp, create)
 		    "DB_ENV->log_newfh: %lu", (u_long)lp->lsn.file);
 	else if (status != DB_LV_NORMAL && status != DB_LV_INCOMPLETE &&
 	    status != DB_LV_OLD_READABLE)
-		ret = DB_NOTFOUND;
+		ret = USR_ERR(env, DB_NOTFOUND);
 
 	return (ret);
 }
@@ -1621,6 +1627,37 @@ err:
 	return (ret);
 }
 
+/*
+ * __log_rep_write --
+ *	Way for replication clients to write the log buffer for the
+ * DB_TXN_WRITE_NOSYNC option.  This is just a thin PUBLIC wrapper
+ * for __log_write that is similar to __log_flush_commit.
+ *
+ * Note that the REP->mtx_clientdb should be held when this is called.
+ * Note that we acquire the log region mutex while holding mtx_clientdb.
+ *
+ * PUBLIC: int __log_rep_write __P((ENV *));
+ */
+int
+__log_rep_write(env)
+	ENV *env;
+{
+	DB_LOG *dblp;
+	LOG *lp;
+	int ret;
+
+	dblp = env->lg_handle;
+	lp = dblp->reginfo.primary;
+	ret = 0;
+	LOG_SYSTEM_LOCK(env);
+	if (!lp->db_log_inmemory && lp->b_off != 0)
+		if ((ret = __log_write(dblp, dblp->bufp,
+		    (u_int32_t)lp->b_off)) == 0)
+			lp->b_off = 0;
+	LOG_SYSTEM_UNLOCK(env);
+	return (ret);
+}
+
 static int
 __log_encrypt_record(env, dbt, hdr, orig)
 	ENV *env;
@@ -1773,6 +1810,7 @@ __log_put_record_int(env, dbp, txnp, ret_lsnp,
 	DB_TXNLOGREC *lr;
 	LOG *lp;
 	PAGE *pghdrstart;
+	u_int64_t ulltmp;
 	u_int32_t hdrsize, op, zero, uinttmp, txn_num;
 	u_int npad;
 	u_int8_t *bp;
@@ -1819,7 +1857,7 @@ __log_put_record_int(env, dbp, txnp, ret_lsnp,
 			return (ret);
 		/*
 		 * We need to assign begin_lsn while holding region mutex.
-		 * That assignment is done inside the DbEnv->log_put call,
+		 * That assignment is done inside the __log_put call,
 		 * so pass in the appropriate memory location to be filled
 		 * in by the log_put code.
 		 */
@@ -1842,8 +1880,7 @@ __log_put_record_int(env, dbp, txnp, ret_lsnp,
 	}
 
 	if (is_durable || txnp == NULL) {
-		if ((ret =
-		    __os_malloc(env, logrec.size, &logrec.data)) != 0)
+		if ((ret = __os_malloc(env, logrec.size, &logrec.data)) != 0)
 			return (ret);
 	} else {
 		if ((ret = __os_malloc(env,
@@ -1891,10 +1928,15 @@ __log_put_record_int(env, dbp, txnp, ret_lsnp,
 			LOGCOPY_32(env, bp, &uinttmp);
 			bp += sizeof(uinttmp);
 			break;
+		case LOGREC_LONGARG:
+			ulltmp = va_arg(argp, u_int64_t);
+			LOGCOPY_64(env, bp, &ulltmp);
+			bp += sizeof(ulltmp);
+			break;
 		case LOGREC_OP:
 			op = va_arg(argp, u_int32_t);
 			LOGCOPY_32(env, bp, &op);
-			bp += sizeof(uinttmp);
+			bp += sizeof(op);
 			break;
 		case LOGREC_DBT:
 		case LOGREC_PGLIST:
diff --git a/src/log/log_stat.c b/src/log/log_stat.c
index 37b74c74..95fe0e2e 100644
--- a/src/log/log_stat.c
+++ b/src/log/log_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/log/log_verify.c b/src/log/log_verify.c
index e7f8f688..2ed2f0f2 100644
--- a/src/log/log_verify.c
+++ b/src/log/log_verify.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -38,6 +38,12 @@ __log_verify_pp(dbenv, lvconfig)
 	lsnrg = ret = timerg = 0;
 	phome = NULL;
 
+	if (lvconfig == NULL) {
+		__db_errx(dbenv->env, DB_STR("2584",
+		    "Must provide a configuration structure."));
+		ret = EINVAL;
+		goto err;
+	}
 	if (!IS_ZERO_LSN(lvconfig->start_lsn) ||
 	    !IS_ZERO_LSN(lvconfig->end_lsn))
 		lsnrg = 1;
@@ -64,7 +70,8 @@ __log_verify_pp(dbenv, lvconfig)
 	}
 
 	ENV_ENTER(dbenv->env, ip);
-	ret = __log_verify(dbenv, lvconfig, ip);
+	REPLICATION_WRAP(dbenv->env,
+	    (__log_verify(dbenv, lvconfig, ip)), 0, ret);
 	ENV_LEAVE(dbenv->env, ip);
 err:	return (ret);
 }
@@ -79,18 +86,16 @@ __log_verify(dbenv, lvconfig, ip)
 	const DB_LOG_VERIFY_CONFIG *lvconfig;
 	DB_THREAD_INFO *ip;
 {
-
-	u_int32_t logcflag, max_fileno;
+	DB_LOG_VRFY_INFO *logvrfy_hdl;
 	DB_LOGC *logc;
-	ENV *env;
-	DBT data;
 	DB_DISTAB dtab;
 	DB_LSN key, start, start2, stop, stop2, verslsn;
-	u_int32_t newversion, version;
+	DBT data;
+	ENV *env;
+	u_int32_t logcflag, max_fileno, newversion, version;
 	int cmp, fwdscroll, goprev, ret, tret;
 	time_t starttime, endtime;
 	const char *okmsg;
-	DB_LOG_VRFY_INFO *logvrfy_hdl;
 
 	okmsg = NULL;
 	fwdscroll = 1;
@@ -98,6 +103,7 @@ __log_verify(dbenv, lvconfig, ip)
 	goprev = 0;
 	env = dbenv->env;
 	logc = NULL;
+	logvrfy_hdl = NULL;
 	memset(&dtab, 0, sizeof(dtab));
 	memset(&data, 0, sizeof(data));
 	version = newversion = 0;
@@ -333,11 +339,12 @@ out:
 err:
 	if (logc != NULL)
 		(void)__logc_close(logc);
-	if ((tret = __destroy_log_vrfy_info(logvrfy_hdl)) != 0 && ret == 0)
+	if (logvrfy_hdl != NULL &&
+	    (tret = __destroy_log_vrfy_info(logvrfy_hdl)) != 0 && ret == 0)
 		ret = tret;
-	if (dtab.int_dispatch)
+	if (dtab.int_dispatch != NULL)
 		__os_free(dbenv->env, dtab.int_dispatch);
-	if (dtab.ext_dispatch)
+	if (dtab.ext_dispatch != NULL)
 		__os_free(dbenv->env, dtab.ext_dispatch);
 
 	return (ret);
diff --git a/src/log/log_verify_auto.c b/src/log/log_verify_auto.c
index 08bc5d64..de08998d 100644
--- a/src/log/log_verify_auto.c
+++ b/src/log/log_verify_auto.c
@@ -174,6 +174,9 @@ __fop_init_verify(env, dtabp)
 	    __fop_write_verify, DB___fop_write)) != 0)
 		return (ret);
 	if ((ret = __db_add_recovery_int(env, dtabp,
+	    __fop_write_file_verify, DB___fop_write_file)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery_int(env, dtabp,
 	    __fop_rename_verify, DB___fop_rename)) != 0)
 		return (ret);
 	if ((ret = __db_add_recovery_int(env, dtabp,
diff --git a/src/log/log_verify_int.c b/src/log/log_verify_int.c
index abe564c6..f69f01c0 100644
--- a/src/log/log_verify_int.c
+++ b/src/log/log_verify_int.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -593,7 +593,7 @@ __crdel_metasub_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -628,7 +628,7 @@ __crdel_inmem_create_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __crdel_inmem_create_read(env, dbtp->data, &argp)) != 0)
@@ -661,7 +661,7 @@ __crdel_inmem_rename_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __crdel_inmem_rename_read(env, dbtp->data, &argp)) != 0)
@@ -694,7 +694,7 @@ __crdel_inmem_remove_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __crdel_inmem_remove_read(env, dbtp->data, &argp)) != 0)
@@ -727,7 +727,7 @@ __db_addrem_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -762,7 +762,7 @@ __db_big_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -797,7 +797,7 @@ __db_ovref_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -832,7 +832,7 @@ __db_relink_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -864,7 +864,7 @@ __db_debug_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __db_debug_read(env, dbtp->data, &argp)) != 0)
@@ -897,7 +897,7 @@ __db_noop_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -931,7 +931,7 @@ __db_pg_alloc_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -963,7 +963,7 @@ __db_pg_alloc_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -998,7 +998,7 @@ __db_pg_free_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1030,7 +1030,7 @@ __db_pg_free_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1065,7 +1065,7 @@ __db_cksum_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0)
@@ -1098,7 +1098,7 @@ __db_pg_freedata_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1130,7 +1130,7 @@ __db_pg_freedata_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1165,7 +1165,7 @@ __db_pg_init_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1200,7 +1200,7 @@ __db_pg_sort_44_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1232,7 +1232,7 @@ __db_pg_trunc_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1264,7 +1264,7 @@ __db_realloc_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1299,7 +1299,7 @@ __db_relink_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1334,7 +1334,7 @@ __db_merge_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1369,7 +1369,7 @@ __db_pgno_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1515,7 +1515,7 @@ __dbreg_register_verify(env, dbtp, lsnp, notused2, lvhp)
 	opcode = 0;
 	ret = ret2 = rmv_dblife = 0;
 	puid = NULL;
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 	fregp = NULL;
 	pflife = NULL;
@@ -1749,6 +1749,36 @@ err:
 }
 
 /*
+ * PUBLIC: int __dbreg_register_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__dbreg_register_42_verify(env, dbtp, lsnp, notused2, lvhp)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *lvhp;
+{
+	__dbreg_register_42_args *argp;
+	DB_LOG_VRFY_INFO *lvh;
+	int ret;
+
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
+	lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+	if ((ret = __dbreg_register_42_read(env, dbtp->data, &argp)) != 0)
+		goto err;
+
+	ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+	/* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+
+err:
+	__os_free(env, argp);
+	return (ret);
+}
+
+/*
  * PUBLIC: int __bam_split_verify __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -1764,7 +1794,7 @@ __bam_split_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1804,7 +1834,7 @@ __bam_split_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1836,7 +1866,7 @@ __bam_rsplit_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1873,7 +1903,7 @@ __bam_adj_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1910,7 +1940,7 @@ __bam_irep_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1947,7 +1977,7 @@ __bam_cadjust_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -1984,7 +2014,7 @@ __bam_cdel_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2021,7 +2051,7 @@ __bam_repl_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2058,7 +2088,7 @@ __bam_root_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2093,7 +2123,7 @@ __bam_curadj_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2129,7 +2159,7 @@ __bam_rcuradj_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2165,7 +2195,7 @@ __bam_relink_43_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2197,7 +2227,7 @@ __bam_merge_44_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2229,7 +2259,7 @@ __fop_create_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __fop_create_42_read(env, dbtp->data, &argp)) != 0)
@@ -2245,6 +2275,37 @@ err:
 }
 
 /*
+ * PUBLIC: int __fop_create_60_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_create_60_verify(env, dbtp, lsnp, notused2, lvhp)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *lvhp;
+{
+	__fop_create_60_args *argp;
+	DB_LOG_VRFY_INFO *lvh;
+	int ret;
+
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
+	lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+	if ((ret = __fop_create_60_read(env, dbtp->data, &argp)) != 0)
+		return (ret);
+
+	ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+	/* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */
+err:
+
+	__os_free(env, argp);
+
+	return (ret);
+}
+
+/*
  * PUBLIC: int __fop_create_verify __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -2260,7 +2321,7 @@ __fop_create_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __fop_create_read(env, dbtp->data, &argp)) != 0)
@@ -2278,6 +2339,38 @@ err:
 }
 
 /*
+ * PUBLIC: int __fop_remove_60_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_remove_60_verify(env, dbtp, lsnp, notused2, lvhp)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *lvhp;
+{
+	__fop_remove_60_args *argp;
+	DB_LOG_VRFY_INFO *lvh;
+	int ret;
+
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
+	lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+	if ((ret = __fop_remove_60_read(env, dbtp->data, &argp)) != 0)
+		return (ret);
+
+	ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+	//LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+err:
+
+	__os_free(env, argp);
+
+	return (ret);
+}
+
+/*
  * PUBLIC: int __fop_remove_verify __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -2293,7 +2386,7 @@ __fop_remove_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __fop_remove_read(env, dbtp->data, &argp)) != 0)
@@ -2326,7 +2419,7 @@ __fop_write_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __fop_write_42_read(env, dbtp->data, &argp)) != 0)
@@ -2341,6 +2434,36 @@ err:
 }
 
 /*
+ * PUBLIC: int __fop_write_60_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_write_60_verify(env, dbtp, lsnp, notused2, lvhp)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *lvhp;
+{
+	__fop_write_60_args *argp;
+	DB_LOG_VRFY_INFO *lvh;
+	int ret;
+
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
+	lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+	if ((ret = __fop_write_60_read(env, dbtp->data, &argp)) != 0)
+		return (ret);
+
+	ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+	/* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */
+err:
+
+	__os_free(env, argp);
+	return (ret);
+}
+
+/*
  * PUBLIC: int __fop_write_verify __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -2356,7 +2479,7 @@ __fop_write_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __fop_write_read(env, dbtp->data, &argp)) != 0)
@@ -2373,6 +2496,67 @@ err:
 }
 
 /*
+ * PUBLIC: int __fop_write_file_60_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_write_file_60_verify(env, dbtp, lsnp, notused2, lvhp)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *lvhp;
+{
+	__fop_write_file_60_args *argp;
+	DB_LOG_VRFY_INFO *lvh;
+	int ret;
+
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
+	lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+	if ((ret = __fop_write_file_60_read(env, dbtp->data, &argp)) != 0)
+		return (ret);
+
+	ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+	/*LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);*/
+err:
+	__os_free(env, argp);
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __fop_write_file_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_write_file_verify(env, dbtp, lsnp, notused2, lvhp)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *lvhp;
+{
+	__fop_write_file_args *argp;
+	DB_LOG_VRFY_INFO *lvh;
+	int ret;
+
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
+	lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+	if ((ret = __fop_write_file_read(env, dbtp->data, &argp)) != 0)
+		return (ret);
+
+	LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+	ON_PAGE_UPDATE4 /* No pages are locked by txns. */
+out:
+
+err:
+
+	__os_free(env, argp);
+	return (ret);
+}
+
+/*
  * PUBLIC: int __fop_rename_42_verify __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -2388,7 +2572,7 @@ __fop_rename_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __fop_rename_42_read(env, dbtp->data, &argp)) != 0)
@@ -2404,6 +2588,37 @@ err:
 }
 
 /*
+ * PUBLIC: int __fop_rename_60_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_rename_60_verify(env, dbtp, lsnp, notused2, lvhp)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *lvhp;
+{
+	__fop_rename_60_args *argp;
+	DB_LOG_VRFY_INFO *lvh;
+	int ret;
+
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
+	lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+	if ((ret = __fop_rename_60_read(env, dbtp->data, &argp)) != 0)
+		return (ret);
+
+	ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+	/* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */
+err:
+
+	__os_free(env, argp);
+
+	return (ret);
+}
+
+/*
  * PUBLIC: int __fop_rename_verify __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -2423,7 +2638,7 @@ __fop_rename_verify(env, dbtp, lsnp, notused2, lvhp)
 	VRFY_FILEREG_INFO freg, *fregp;
 
 	memset(&freg, 0, sizeof(freg));
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 	buf = NULL;
 
@@ -2470,6 +2685,38 @@ err:
 }
 
 /*
+ * PUBLIC: int __fop_file_remove_60_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC:     db_recops, void *));
+ */
+int
+__fop_file_remove_60_verify(env, dbtp, lsnp, notused2, lvhp)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *lvhp;
+{
+	__fop_file_remove_60_args *argp;
+	DB_LOG_VRFY_INFO *lvh;
+	int ret;
+
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
+	lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+	if ((ret = __fop_file_remove_60_read(env, dbtp->data, &argp)) != 0)
+		return (ret);
+
+	ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+	//LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+err:
+
+	__os_free(env, argp);
+
+	return (ret);
+}
+
+/*
  * PUBLIC: int __fop_file_remove_verify __P((ENV *, DBT *, DB_LSN *,
  * PUBLIC:     db_recops, void *));
  */
@@ -2485,7 +2732,7 @@ __fop_file_remove_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __fop_file_remove_read(env, dbtp->data, &argp)) != 0)
@@ -2519,7 +2766,7 @@ __ham_insdel_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2555,7 +2802,7 @@ __ham_newpage_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2592,7 +2839,7 @@ __ham_splitdata_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2629,7 +2876,7 @@ __ham_replace_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2667,7 +2914,7 @@ __ham_copypage_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2703,7 +2950,7 @@ __ham_metagroup_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2735,7 +2982,7 @@ __ham_metagroup_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2771,7 +3018,7 @@ __ham_groupalloc_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2807,7 +3054,7 @@ __ham_groupalloc_verify(env, dbtp, lsnp, notused2, lvhp)
 	ret = 0;
 	pflife = NULL;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2863,7 +3110,7 @@ __ham_changeslot_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2900,7 +3147,7 @@ __ham_contract_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2936,7 +3183,7 @@ __ham_curadj_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -2973,7 +3220,7 @@ __ham_chgpg_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -3011,7 +3258,7 @@ __heap_addrem_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -3030,6 +3277,40 @@ err:
 }
 
 /*
+ * PUBLIC: int __heap_addrem_60_verify
+ * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__heap_addrem_60_verify(env, dbtp, lsnp, notused2, lvhp)
+	ENV *env;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *lvhp;
+{
+	__heap_addrem_60_args *argp;
+	DB_LOG_VRFY_INFO *lvh;
+	int ret;
+
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
+	lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+	if ((ret =
+	    __heap_addrem_60_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+		return (ret);
+
+	LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+	ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+	if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0)
+		goto err;
+out:
+
+err:
+	__os_free(env, argp);
+	return (ret);
+}
+
+/*
  * PUBLIC: int __heap_pg_alloc_verify
  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
  */
@@ -3045,7 +3326,7 @@ __heap_pg_alloc_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -3060,7 +3341,7 @@ out:
 
 err:
 	__os_free(env, argp);
-	return (ret);	
+	return (ret);
 }
 
 /*
@@ -3079,7 +3360,7 @@ __heap_trunc_meta_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -3095,7 +3376,7 @@ out:
 err:
 
 	__os_free(env, argp);
-	return (ret);	
+	return (ret);
 }
 
 /*
@@ -3114,7 +3395,7 @@ __heap_trunc_page_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -3150,7 +3431,7 @@ __qam_incfirst_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -3186,7 +3467,7 @@ __qam_mvptr_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -3222,7 +3503,7 @@ __qam_del_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -3258,7 +3539,7 @@ __qam_add_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -3294,7 +3575,7 @@ __qam_delext_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret =
@@ -3331,7 +3612,7 @@ __txn_regop_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __txn_regop_42_read(env, dbtp->data, &argp)) != 0)
@@ -3365,7 +3646,7 @@ __txn_regop_verify(env, dbtp, lsnp, notused2, lvhp)
 	VRFY_TIMESTAMP_INFO tsinfo;
 
 	ptvi = pptvi = NULL;
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 	ret = ret2 = started = 0;
 
@@ -3480,7 +3761,7 @@ __txn_ckp_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __txn_ckp_42_read(env, dbtp->data, &argp)) != 0)
@@ -3517,7 +3798,7 @@ __txn_ckp_verify(env, dbtp, lsnp, notused2, lvhp)
 	time_t ckp_time, lastckp_time;
 
 	lastckp = NULL;
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 	memset(&ckpinfo, 0, sizeof(ckpinfo));
 	memset(&cvp, 0, sizeof(cvp));
@@ -3675,7 +3956,7 @@ __txn_child_verify(env, dbtp, lsnp, notused2, lvhp)
 	 * we never know the T0 has an active child txn T1, all child txns
 	 * we know are committed.
 	 */
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 	ptvi = ptvi2 = NULL;
 	ret = ret2 = started = 0;
@@ -3811,7 +4092,7 @@ __txn_xa_regop_42_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __txn_xa_regop_42_read(env, dbtp->data, &argp)) != 0)
@@ -3844,7 +4125,7 @@ __txn_prepare_verify(env, dbtp, lsnp, notused2, lvhp)
 
 	ret = ret2 = started = 0;
 	ptvi = NULL;
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 
 	if ((ret = __txn_prepare_read(env, dbtp->data, &argp)) != 0)
@@ -3924,7 +4205,7 @@ __txn_recycle_verify(env, dbtp, lsnp, notused2, lvhp)
 	DB_LOG_VRFY_INFO *lvh;
 	int ret;
 
-	notused2 = DB_TXN_LOG_VERIFY;
+	COMPQUIET(notused2, DB_TXN_LOG_VERIFY);
 	lvh = (DB_LOG_VRFY_INFO *)lvhp;
 	ret = 0;
 
diff --git a/src/log/log_verify_stub.c b/src/log/log_verify_stub.c
index e6589a50..fdd9a795 100644
--- a/src/log/log_verify_stub.c
+++ b/src/log/log_verify_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/log/log_verify_util.c b/src/log/log_verify_util.c
index 88682921..b0cfe0cb 100644
--- a/src/log/log_verify_util.c
+++ b/src/log/log_verify_util.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -53,16 +53,16 @@
 	}								\
 } while (0)
 
-typedef int (*btcmp_funct)(DB *, const DBT *, const DBT *);
-typedef int (*dupcmp_funct)(DB *, const DBT *, const DBT *);
+typedef int (*btcmp_funct)(DB *, const DBT *, const DBT *, size_t *);
+typedef int (*dupcmp_funct)(DB *, const DBT *, const DBT *, size_t *);
 
 static int __lv_add_recycle_handler __P((
     DB_LOG_VRFY_INFO *, VRFY_TXN_INFO *, void *));
 static int __lv_add_recycle_lsn __P((VRFY_TXN_INFO *, const DB_LSN *));
 static size_t __lv_dbt_arrsz __P((const DBT *, u_int32_t));
-static int __lv_fidpgno_cmp __P((DB *, const DBT *, const DBT *));
-static int __lv_i32_cmp __P((DB *, const DBT *, const DBT *));
-static int __lv_lsn_cmp __P((DB *, const DBT *, const DBT *));
+static int __lv_fidpgno_cmp __P((DB *, const DBT *, const DBT *, size_t *));
+static int __lv_i32_cmp __P((DB *, const DBT *, const DBT *, size_t *));
+static int __lv_lsn_cmp __P((DB *, const DBT *, const DBT *, size_t *));
 static void __lv_on_bdbop_err __P((int));
 static int __lv_open_db __P((DB_ENV *, DB **, DB_THREAD_INFO *,
     const char *, int, btcmp_funct, u_int32_t, dupcmp_funct));
@@ -73,8 +73,8 @@ static int __lv_seccbk_fname __P((DB *, const DBT *, const DBT *, DBT *));
 static int __lv_seccbk_lsn __P((DB *, const DBT *, const DBT *, DBT *));
 static int __lv_seccbk_txnpg __P((DB *, const DBT *, const DBT *, DBT *));
 static void __lv_setup_logtype_names __P((DB_LOG_VRFY_INFO *lvinfo));
-static int __lv_txnrgns_lsn_cmp __P((DB *, const DBT *, const DBT *));
-static int __lv_ui32_cmp __P((DB *, const DBT *, const DBT *));
+static int __lv_txnrgns_lsn_cmp __P((DB *, const DBT *, const DBT *, size_t *));
+static int __lv_ui32_cmp __P((DB *, const DBT *, const DBT *, size_t *));
 static int __lv_unpack_txn_vrfy_info __P((VRFY_TXN_INFO **, const DBT *));
 static int __lv_unpack_filereg __P((const DBT *, VRFY_FILEREG_INFO **));
 
@@ -383,16 +383,18 @@ err:
 
 /* Btree compare function for a [fileid, pgno] key. */
 static int
-__lv_fidpgno_cmp(db, dbt1, dbt2)
+__lv_fidpgno_cmp(db, dbt1, dbt2, locp)
 	DB *db;
 	const DBT *dbt1;
 	const DBT *dbt2;
+	size_t * locp;
 {
 	db_pgno_t pgno1, pgno2;
 	int ret;
 	size_t len;
 
 	COMPQUIET(db, NULL);
+	COMPQUIET(locp, NULL);
 	len = DB_FILE_ID_LEN;
 	ret = memcmp(dbt1->data, dbt2->data, len);
 	if (ret == 0) {
@@ -408,14 +410,16 @@ __lv_fidpgno_cmp(db, dbt1, dbt2)
 
 /* Btree compare function for a int32_t type of key. */
 static int
-__lv_i32_cmp(db, dbt1, dbt2)
+__lv_i32_cmp(db, dbt1, dbt2, locp)
 	DB *db;
 	const DBT *dbt1;
 	const DBT *dbt2;
+	size_t *locp;
 {
 	int32_t k1, k2;
 
 	COMPQUIET(db, NULL);
+	COMPQUIET(locp, NULL);
 	memcpy(&k1, dbt1->data, sizeof(k1));
 	memcpy(&k2, dbt2->data, sizeof(k2));
 
@@ -424,14 +428,16 @@ __lv_i32_cmp(db, dbt1, dbt2)
 
 /* Btree compare function for a u_int32_t type of key. */
 static int
-__lv_ui32_cmp(db, dbt1, dbt2)
+__lv_ui32_cmp(db, dbt1, dbt2, locp)
 	DB *db;
 	const DBT *dbt1;
 	const DBT *dbt2;
+	size_t *locp;
 {
 	u_int32_t k1, k2;
 
 	COMPQUIET(db, NULL);
+	COMPQUIET(locp, NULL);
 	memcpy(&k1, dbt1->data, sizeof(k1));
 	memcpy(&k2, dbt2->data, sizeof(k2));
 
@@ -440,18 +446,21 @@ __lv_ui32_cmp(db, dbt1, dbt2)
 
 /* Btree compare function for a DB_LSN type of key. */
 static int
-__lv_lsn_cmp(db, dbt1, dbt2)
+__lv_lsn_cmp(db, dbt1, dbt2, locp)
 	DB *db;
 	const DBT *dbt1;
 	const DBT *dbt2;
+	size_t *locp;
 {
 	DB_LSN lsn1, lsn2;
 
+	COMPQUIET(locp, NULL);
 	DB_ASSERT(db->env, dbt1->size == sizeof(DB_LSN));
 	DB_ASSERT(db->env, dbt2->size == sizeof(DB_LSN));
 	memcpy(&lsn1, dbt1->data, sizeof(DB_LSN));
 	memcpy(&lsn2, dbt2->data, sizeof(DB_LSN));
 
+	COMPQUIET(db, NULL);
 	return (LOG_COMPARE(&lsn1, &lsn2));
 }
 
@@ -1663,17 +1672,21 @@ int __put_timestamp_info (lvinfo, tsinfo)
 }
 
 static int
-__lv_txnrgns_lsn_cmp (db, d1, d2)
+__lv_txnrgns_lsn_cmp (db, d1, d2, locp)
 	DB *db;
 	const DBT *d1, *d2;
+	size_t *locp;
 {
 	struct __lv_txnrange r1, r2;
 
+	COMPQUIET(locp, NULL);
+
 	DB_ASSERT(db->env, d1->size == sizeof(r1));
 	DB_ASSERT(db->env, d2->size == sizeof(r2));
 	memcpy(&r1, d1->data, d1->size);
 	memcpy(&r2, d2->data, d2->size);
 
+	COMPQUIET(db, NULL);
 	return (LOG_COMPARE(&(r1.end), &(r2.end)));
 }
 
diff --git a/src/mp/mp_alloc.c b/src/mp/mp_alloc.c
index dc331215..011f54c6 100644
--- a/src/mp/mp_alloc.c
+++ b/src/mp/mp_alloc.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -22,8 +22,112 @@
 #endif
 
 /*
+ * __memp_bh_unreachable --
+ *
+ *	Determine whether this buffer can not ever be seen again: is the next
+ *	newer version visible to the same transaction which sees this one?
+ *	If both versions are visibile to the same transaction, there is no
+ *	reason to keep the older one: it can be purged.
+ *
+ *	If this buffer has a more recent version, and there is a transaction
+ *	with a read_lsn between this buffer's and that more recent version's,
+ *	the buffer is visible to at least that transaction, so return FALSE.
+ *	Otherwise return TRUE.
+ *
+ *	txns:	   3/10		       2/10	   2/5 2/1          1/10
+ *	vers: 3/15	 2/15  2/14    2/10   2/8	     1/150
+ *	      vis	 vis  unreach   vis  unreach	     vis
+ *	who  new txns	 3/10	       2/10		    2/5, 2/1
+ *	sees
+ *
+ *	Note: in the abvove example, the page was allocated after txn 1/10
+ *	started. 1/10 would not see any version of the page.
+ *
+ * PUBLIC: int __memp_bh_unreachable __P((ENV *, BH *, DB_LSN *, int));
+ */
+int
+__memp_bh_unreachable(env, bhp, snapshots, n_snapshots)
+	ENV *env;
+	BH *bhp;
+	DB_LSN *snapshots;
+	int n_snapshots;
+{
+	BH *newer_bhp;
+	DB_LSN b_vlsn, n_vlsn;
+	int i, ret;
+#ifdef DIAGNOSTIC
+	DB_MPOOL *dbmp;
+	DB_MSGBUF mb;
+	MPOOLFILE *bh_mfp;
+#endif
+
+	/*
+	 * The buffer can't be purged if it is being used, or is the most recent
+	 * version, or the next newer version isn't a copy yet.
+	 */
+	if (BH_REFCOUNT(bhp) != 0 ||
+	    (newer_bhp = SH_CHAIN_NEXT(bhp, vc, __bh)) == NULL ||
+	    newer_bhp->td_off == INVALID_ROFF)
+		return (FALSE);
+
+	/*
+	 * Find the visiblity LSNs for this buffer (b_vlsn) and the more recent,
+	 * newer buffer (n_vlsn). If the newer version hasn't committed yet the
+	 * bhp could be needed.
+	 */
+	n_vlsn = *VISIBLE_LSN(env, newer_bhp);
+	if (IS_MAX_LSN(n_vlsn))
+		return (FALSE);
+	if (bhp->td_off == INVALID_ROFF)
+		INIT_LSN(b_vlsn);
+	else
+		b_vlsn = *VISIBLE_LSN(env, bhp);
+
+	ret = TRUE;
+	/*
+	 * Look for a transaction which is between n_lsn and b_lsn - determining
+	 * that bhp is reachable. Stop looking once the transactions get so
+	 * small (old) that they precede the buffer's version; no earlier txn
+	 * could be between n_vlsn and b_vlsn.
+	 */
+	for (i = 0;
+	     i < n_snapshots && LOG_COMPARE(&snapshots[i], &b_vlsn) >= 0;
+	     i++) {
+		if (LOG_COMPARE(&snapshots[i], &n_vlsn) < 0) {
+			/*
+			 * This txn can see (started after) bhp, but not
+			 * newer_bhp (which committed after this txn started).
+			 */
+			ret = FALSE;
+			break;
+		}
+	}
+
+#ifdef DIAGNOSTIC
+	if (FLD_ISSET(env->dbenv->verbose, DB_VERB_MVCC)) {
+		dbmp = env->mp_handle;
+		bh_mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
+		DB_MSGBUF_INIT(&mb);
+		__db_msgadd(env, &mb,
+    "bh_unreachable %s pgno %d %s %lu/%lu %x newer %lu/%lu txn #%d in\n",
+		    __memp_fns(dbmp, bh_mfp), bhp->pgno,
+		    ret ? "purgeable" : "needed",
+		    (u_long)b_vlsn.file, (u_long)b_vlsn.offset, bhp->flags,
+		    (u_long)n_vlsn.file, (u_long)n_vlsn.offset, i);
+		for (i = 0; i != n_snapshots; i++)
+			__db_msgadd(env, &mb, " %lu/%lu",
+			    (u_long)snapshots[i].file,
+			    (u_long)snapshots[i].offset);
+		DB_MSGBUF_FLUSH(env, &mb);
+	}
+#endif
+	return (ret);
+}
+
+/*
  * __memp_alloc --
- *	Allocate some space from a cache region.
+ *	Allocate some space from a cache region. If the region is full then
+ *	reuse one or more cache buffers.
  *
  * PUBLIC: int __memp_alloc __P((DB_MPOOL *,
  * PUBLIC:     REGINFO *, MPOOLFILE *, size_t, roff_t *, void *));
@@ -39,7 +143,7 @@ __memp_alloc(dbmp, infop, mfp, len, offsetp, retp)
 {
 	BH *bhp, *current_bhp, *mvcc_bhp, *oldest_bhp;
 	BH_FROZEN_PAGE *frozen_bhp;
-	DB_LSN oldest_reader, vlsn;
+	DB_LSN *snapshots, vlsn;
 	DB_MPOOL_HASH *dbht, *hp, *hp_end, *hp_saved, *hp_tmp;
 	ENV *env;
 	MPOOL *c_mp;
@@ -49,7 +153,7 @@ __memp_alloc(dbmp, infop, mfp, len, offsetp, retp)
 	u_int32_t dirty_eviction, high_priority, priority, versions;
 	u_int32_t priority_saved, put_counter, lru_generation, total_buckets;
 	int aggressive, alloc_freeze, b_lock, giveup;
-	int h_locked, need_free, obsolete, ret, write_error;
+	int h_locked, need_free, n_snapshots, obsolete, ret, write_error;
 	u_int8_t *endp;
 	void *p;
 
@@ -58,11 +162,10 @@ __memp_alloc(dbmp, infop, mfp, len, offsetp, retp)
 	dbht = R_ADDR(infop, c_mp->htab);
 	hp_end = &dbht[c_mp->htab_buckets];
 	hp_saved = NULL;
-	priority_saved = 0;
-	write_error = 0;
-
+	snapshots = NULL;
+	priority_saved = write_error = 0;
 	buckets = buffers = put_counter = total_buckets = versions = 0;
-	aggressive = alloc_freeze = giveup = h_locked = 0;
+	aggressive = alloc_freeze = giveup = h_locked = n_snapshots = 0;
 
 	/*
 	 * If we're allocating a buffer, and the one we're discarding is the
@@ -138,13 +241,15 @@ found:		if (offsetp != NULL)
 			    c_mp->stat.st_alloc_pages, buffers, infop->id);
 		}
 #endif
-		return (0);
+		goto done;
 	} else if (giveup || c_mp->pages == 0) {
 		MPOOL_REGION_UNLOCK(env, infop);
 
 		__db_errx(env, DB_STR("3017",
 		    "unable to allocate space from the buffer cache"));
-		return ((ret == ENOMEM && write_error != 0) ? EIO : ret);
+		if (ret == ENOMEM && write_error != 0)
+			ret = EIO;
+		goto done;
 	}
 
 search:
@@ -158,7 +263,6 @@ search:
 	lru_generation = c_mp->lru_generation;
 
 	ret = 0;
-	MAX_LSN(oldest_reader);
 
 	/*
 	 * We re-attempt the allocation every time we've freed 3 times what
@@ -222,6 +326,13 @@ search:
 				goto alloc;
 			MPOOL_REGION_UNLOCK(env, infop);
 
+			/* Refresh the list of mvcc reader transactions. */
+			if (snapshots != NULL)
+				__os_free(env, snapshots);
+			if ((ret = __txn_get_readers(
+			    env, &snapshots, &n_snapshots)) != 0)
+				goto err;
+
 			aggressive++;
 			/*
 			 * Once aggressive, we consider all buffers. By setting
@@ -266,13 +377,6 @@ search:
 		if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
 			continue;
 
-		/* Set aggressive if we have already searched for too long. */
-		if (aggressive == 0 && buckets >= MPOOL_ALLOC_SEARCH_LIMIT) {
-			aggressive = 1;
-			/* Once aggressive, we consider all buffers. */
-			high_priority = MPOOL_LRU_MAX;
-		}
-
 		/* Unlock the region and lock the hash bucket. */
 		MPOOL_REGION_UNLOCK(env, infop);
 		MUTEX_READLOCK(env, hp->mtx_hash);
@@ -280,29 +384,45 @@ search:
 		b_lock = 0;
 
 		/*
+		 * Set aggressive to consider all buffers if we have already
+		 * searched in too many buckets.
+		 */
+		if (buckets > MPOOL_ALLOC_SEARCH_LIMIT && aggressive == 0) {
+			aggressive = 1;
+			/* Once aggressive, we consider all buffers. */
+			high_priority = MPOOL_LRU_MAX;
+			if (snapshots == NULL && (ret = __txn_get_readers(
+			    env, &snapshots, &n_snapshots)) != 0)
+				goto err;
+		}
+
+		/*
 		 * Find a buffer we can use.
+		 * Skip over refcount > 0 buffers; we can't get rid of them.
 		 *
-		 * We use the lowest-LRU singleton buffer if we find one and
-		 * it's better than the result of another hash bucket we've
+		 * Without MVCC we use the lowest-LRU singleton buffer we find
+		 * that's better than the result of another hash bucket we've
 		 * reviewed.  We do not use a buffer which has a priority
 		 * greater than high_priority unless we are being aggressive.
 		 *
-		 * With MVCC buffers, the situation is more complicated: we
-		 * don't want to free a buffer out of the middle of an MVCC
-		 * chain, since that requires I/O.  So, walk the buffers,
-		 * looking for an obsolete buffer at the end of an MVCC chain.
-		 * Once a buffer becomes obsolete, its LRU priority is
-		 * irrelevant because that version can never be accessed again.
+		 * MVCC requires looking at additional factors: we don't want to
+		 * free a still-relevent buffer out of the middle of an MVCC
+		 * chain, since that requires freezing - lots of I/O.  So,
+		 * walk the buffers, looking for an obsolete buffer at the
+		 * end of the MVCC chain. Once a buffer becomes obsolete, its
+		 * LRU priority is irrelevant because that version can never
+		 * be accessed again.
 		 *
 		 * If we don't find any obsolete MVCC buffers, we will get
 		 * aggressive, and in that case consider the lowest priority
 		 * buffer within a chain.
-		 *
-		 * Ignore referenced buffers, we can't get rid of them.
 		 */
 retry_search:	bhp = NULL;
 		bucket_priority = high_priority;
 		obsolete = 0;
+		if (n_snapshots > 0 && LOG_COMPARE(&snapshots[n_snapshots - 1],
+		    &hp->old_reader) > 0)
+			hp->old_reader = snapshots[n_snapshots - 1];
 		SH_TAILQ_FOREACH(current_bhp, &hp->hash_bucket, hq, __bh) {
 			/*
 			 * First, do the standard LRU check for singletons.
@@ -340,55 +460,63 @@ retry_search:	bhp = NULL;
 			    mvcc_bhp != NULL;
 			    oldest_bhp = mvcc_bhp,
 			    mvcc_bhp = SH_CHAIN_PREV(mvcc_bhp, vc, __bh)) {
+				DB_ASSERT(env, mvcc_bhp !=
+				    SH_CHAIN_PREV(mvcc_bhp, vc, __bh));
 #ifdef MPOOL_ALLOC_SEARCH_DYN
 				if (aggressive == 0 &&
-				     ++high_priority >= c_mp->lru_priority)
+				     ++high_priority >= c_mp->lru_priority) {
 					aggressive = 1;
+					if (snapshots == NULL && (ret =
+					    __txn_readers(env,
+					    &snapshots, &n_snapshots)) != 0)
+						goto err;
+				}
 #endif
-				DB_ASSERT(env, mvcc_bhp !=
-				    SH_CHAIN_PREV(mvcc_bhp, vc, __bh));
-				if ((aggressive < 2 &&
-				    ++versions < (buffers >> 2)) ||
-				    BH_REFCOUNT(mvcc_bhp) != 0)
+				if (n_snapshots > 0 &&
+				    __memp_bh_unreachable(env,
+				    mvcc_bhp, snapshots, n_snapshots)) {
+					oldest_bhp = mvcc_bhp;
+					goto is_obsolete;
+				}
+				if (bhp != NULL &&
+				    mvcc_bhp->priority >= bhp->priority)
+					continue;
+				if (BH_REFCOUNT(mvcc_bhp) != 0)
+					continue;
+				/*
+				 * Since taking still-relevant versions requires
+				 * freezing, skip over them at low aggression
+				 * levels unless we see that a high proportion
+				 * of buffers (over 1/4) are MVCC copies.
+				 */
+				if (aggressive < 2 &&
+				    ++versions < (buffers >> 2))
 					continue;
 				buffers++;
-				if (!F_ISSET(mvcc_bhp, BH_FROZEN) &&
-				    (bhp == NULL ||
-				    bhp->priority > mvcc_bhp->priority)) {
-					if (bhp != NULL)
-						atomic_dec(env, &bhp->ref);
-					bhp = mvcc_bhp;
-					atomic_inc(env, &bhp->ref);
-				}
+				if (F_ISSET(mvcc_bhp, BH_FROZEN))
+					continue;
+				/*
+				 * Select mvcc_bhp as current best candidate,
+				 * releasing the current candidate, if any.
+				 */
+				if (bhp != NULL)
+					atomic_dec(env, &bhp->ref);
+				bhp = mvcc_bhp;
+				atomic_inc(env, &bhp->ref);
 			}
 
 			/*
 			 * oldest_bhp is the last buffer on the MVCC chain, and
 			 * an obsolete buffer at the end of the MVCC chain gets
-			 * used without further search. Before checking for
-			 * obsolescence, update the cached oldest reader LSN in
-			 * the bucket if it is older than call's oldest_reader.
+			 * used without further search.
 			 */
 			if (BH_REFCOUNT(oldest_bhp) != 0)
 				continue;
 
-			if (LOG_COMPARE(&oldest_reader, &hp->old_reader) > 0) {
-				if (IS_MAX_LSN(oldest_reader) &&
-				   (ret = __txn_oldest_reader(
-				    env, &oldest_reader)) != 0) {
-					MUTEX_UNLOCK(env, hp->mtx_hash);
-					if (bhp != NULL)
-						atomic_dec(env, &bhp->ref);
-					return (ret);
-				}
-				if (LOG_COMPARE(&oldest_reader,
-				    &hp->old_reader) > 0)
-					hp->old_reader = oldest_reader;
-			}
-
 			if (BH_OBSOLETE(oldest_bhp, hp->old_reader, vlsn)) {
 				if (aggressive < 2)
 					buffers++;
+is_obsolete:
 				obsolete = 1;
 				if (bhp != NULL)
 					atomic_dec(env, &bhp->ref);
@@ -410,10 +538,18 @@ retry_search:	bhp = NULL;
 
 		/*
 		 * Compare two hash buckets and select the one with the lower
-		 * priority. Performance testing showed looking at two improves
-		 * the LRU-ness and looking at more only does a little better.
+		 * priority, except mvcc at high aggression levels. Performance
+		 * testing shows looking at two improves the LRU-ness and
+		 * looking at more only does a little better.
 		 */
 		if (hp_saved == NULL) {
+			/*
+			 * At high aggressive levels when mvcc is active, stop
+			 * looking for candidate once one has been found.
+			 * Freezing takes more time than writing out to a db.
+			 */
+			if (aggressive > 1 && n_snapshots > 1)
+				goto this_buffer;
 			hp_saved = hp;
 			priority_saved = priority;
 			goto next_hb;
@@ -487,11 +623,15 @@ this_buffer:	/*
 
 		/* We cannot block as the caller is probably holding locks. */
 		if ((ret = MUTEX_TRYLOCK(env, bhp->mtx_buf)) != 0) {
-			if (ret != DB_LOCK_NOTGRANTED)
-				return (ret);
+			if (ret != DB_LOCK_NOTGRANTED) {
+				goto err;
+			}
+			ret = 0;
 			goto next_hb;
 		}
 		F_SET(bhp, BH_EXCLUSIVE);
+		if (obsolete)
+			F_SET(bhp, BH_UNREACHABLE);
 		b_lock = 1;
 
 		/* Someone may have grabbed it while we got the lock. */
@@ -557,7 +697,7 @@ this_buffer:	/*
 				F_CLR(bhp, BH_EXCLUSIVE);
 				MUTEX_UNLOCK(env, bhp->mtx_buf);
 				DB_ASSERT(env, !h_locked);
-				return (ret);
+				goto err;
 			}
 		}
 
@@ -573,16 +713,25 @@ this_buffer:	/*
 		if (BH_REFCOUNT(bhp) != 1 || F_ISSET(bhp, BH_DIRTY) ||
 		    (SH_CHAIN_HASNEXT(bhp, vc) &&
 		    SH_CHAIN_NEXTP(bhp, vc, __bh)->td_off != bhp->td_off &&
-		    !BH_OBSOLETE(bhp, hp->old_reader, vlsn)))
+		    !(obsolete || BH_OBSOLETE(bhp, hp->old_reader, vlsn)))) {
+			if (FLD_ISSET(env->dbenv->verbose, DB_VERB_MVCC))
+				__db_msg(env,
+		    "memp_alloc next_hb past bhp %lx flags %x ref %d %lx/%lx",
+				    (u_long)R_OFFSET(infop, bhp), bhp->flags,
+				    BH_REFCOUNT(bhp),
+			(u_long)R_OFFSET(infop, SH_CHAIN_NEXTP(bhp, vc, __bh)),
+			(u_long)R_OFFSET(infop, SH_CHAIN_PREVP(bhp, vc, __bh)));
 			goto next_hb;
+		}
 
 		/*
 		 * If the buffer is frozen, thaw it and look for another one
-		 * we can use. (Calling __memp_bh_freeze above will not
-		 * mark bhp BH_FROZEN.)
+		 * we can use. (Calling __memp_bh_freeze above will not mark
+		 * this bhp BH_FROZEN; it creates another frozen one.)
 		 */
 		if (F_ISSET(bhp, BH_FROZEN)) {
-			DB_ASSERT(env, obsolete || SH_CHAIN_SINGLETON(bhp, vc));
+			DB_ASSERT(env, SH_CHAIN_SINGLETON(bhp, vc) ||
+			    obsolete || BH_OBSOLETE(bhp, hp->old_reader, vlsn));
 			DB_ASSERT(env, BH_REFCOUNT(bhp) > 0);
 			if (!F_ISSET(bhp, BH_THAWED)) {
 				/*
@@ -592,10 +741,10 @@ this_buffer:	/*
 				 */
 				if ((ret = __memp_bh_thaw(dbmp,
 				    infop, hp, bhp, NULL)) != 0)
-					return (ret);
+					goto done;
 				MUTEX_READLOCK(env, hp->mtx_hash);
 			} else {
-				need_free = (atomic_dec(env, &bhp->ref) == 0);
+				need_free = atomic_dec(env, &bhp->ref) == 0;
 				F_CLR(bhp, BH_EXCLUSIVE);
 				MUTEX_UNLOCK(env, bhp->mtx_buf);
 				if (need_free) {
@@ -626,7 +775,10 @@ this_buffer:	/*
 		if (alloc_freeze) {
 			if ((ret = __memp_bhfree(dbmp,
 			     infop, bh_mfp, hp, bhp, 0)) != 0)
-				return (ret);
+				goto err;
+			DB_ASSERT(env, bhp->mtx_buf != MUTEX_INVALID);
+			if ((ret = __mutex_free(env, &bhp->mtx_buf)) != 0)
+				goto err;
 			b_lock = 0;
 			h_locked = 0;
 
@@ -654,23 +806,21 @@ this_buffer:	/*
 		}
 
 		/*
-		 * Check to see if the buffer is the size we're looking for.
-		 * If so, we can simply reuse it.  Otherwise, free the buffer
-		 * and its space and keep looking.
+		 * If the buffer is the size we're looking for, we can simply
+		 * reuse it. Otherwise, free it and keep looking.
 		 */
 		if (mfp != NULL && mfp->pagesize == bh_mfp->pagesize) {
 			if ((ret = __memp_bhfree(dbmp,
 			     infop, bh_mfp, hp, bhp, 0)) != 0)
-				return (ret);
+				goto err;
 			p = bhp;
 			goto found;
 		}
 
 		freed_space += sizeof(*bhp) + bh_mfp->pagesize;
-		if ((ret =
-		    __memp_bhfree(dbmp, infop,
-			 bh_mfp, hp, bhp, BH_FREE_FREEMEM)) != 0)
-			return (ret);
+		if ((ret = __memp_bhfree(dbmp,
+		    infop, bh_mfp, hp, bhp, BH_FREE_FREEMEM)) != 0)
+			goto err;
 
 		/* Reset "aggressive" and "write_error" if we free any space. */
 		if (aggressive > 1)
@@ -689,12 +839,14 @@ next_hb:		if (bhp != NULL) {
 				if (b_lock) {
 					F_CLR(bhp, BH_EXCLUSIVE);
 					MUTEX_UNLOCK(env, bhp->mtx_buf);
+					b_lock = 0;
 				}
 			}
 			if (h_locked)
 				MUTEX_UNLOCK(env, hp->mtx_hash);
 			h_locked = 0;
 		}
+		obsolete = 0;
 		MPOOL_REGION_LOCK(env, infop);
 
 		/*
@@ -706,7 +858,15 @@ next_hb:		if (bhp != NULL) {
 		if (freed_space >= 3 * len)
 			goto alloc;
 	}
-	/* NOTREACHED */
+err:
+	if (h_locked) {
+		MUTEX_UNLOCK(env, hp->mtx_hash);
+		h_locked = 0;
+	}
+done:
+	if (snapshots != NULL)
+		__os_free(env, snapshots);
+	return (ret);
 }
 
 /*
diff --git a/src/mp/mp_backup.c b/src/mp/mp_backup.c
index f376cda7..f1072292 100644
--- a/src/mp/mp_backup.c
+++ b/src/mp/mp_backup.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -145,6 +145,9 @@ __memp_backup_mpf(env, mpf, ip, first_pgno, last_pgno, fp, handle, flags)
 
 	if (backup == NULL || (len = backup->size) == 0)
 		len = MEGABYTE;
+	/* Ensure backup page size is at least as big as db page size */
+	if (len < mfp->pagesize)
+		len = mfp->pagesize;
 	if ((ret = __os_malloc(env, len, &buf)) != 0)
 		return (ret);
 	write_size = (u_int32_t)(len / mfp->pagesize);
@@ -188,7 +191,7 @@ __memp_backup_mpf(env, mpf, ip, first_pgno, last_pgno, fp, handle, flags)
 
 		if (backup != NULL && backup->write != NULL) {
 			if ((ret = backup->write(
-			     env->dbenv, gigs, off, (u_int32_t)nr, 
+			     env->dbenv, gigs, off, (u_int32_t)nr,
 			     buf, handle)) != 0)
 				break;
 		} else {
diff --git a/src/mp/mp_bh.c b/src/mp/mp_bh.c
index 1df8e206..30293f29 100644
--- a/src/mp/mp_bh.c
+++ b/src/mp/mp_bh.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -157,7 +157,7 @@ __memp_bhwrite(dbmp, hp, mfp, bhp, open_extents)
 	opened = 1;
 	if ((ret = __memp_fopen(dbmfp, mfp, NULL,
 	    NULL, DB_FLUSH | DB_DURABLE_UNKNOWN, 0, mfp->pagesize)) != 0) {
-	    	dbmfp->ref--;
+		dbmfp->ref--;
 		(void)__memp_fclose(dbmfp, 0);
 
 		/*
@@ -264,7 +264,7 @@ __memp_pgread(dbmfp, bhp, can_create)
 		 * how to handle the error.
 		 */
 		if (!can_create) {
-			ret = DB_PAGE_NOTFOUND;
+			ret = USR_ERR(env, DB_PAGE_NOTFOUND);
 			goto err;
 		}
 
@@ -557,6 +557,9 @@ err:	__db_errx(env, DB_STR_A("3016",
  * __memp_bhfree --
  *	Free a bucket header and its referenced data.
  *
+ *	The hash bucket is unlocked before returning except when flags includes
+ *	BH_FREE_UNLOCKED -- or there was no hp passed in to begin with.
+ *
  * PUBLIC: int __memp_bhfree __P((DB_MPOOL *,
  * PUBLIC:	REGINFO *, MPOOLFILE *, DB_MPOOL_HASH *, BH *, u_int32_t));
  */
@@ -600,10 +603,13 @@ __memp_bhfree(dbmp, infop, mfp, hp, bhp, flags)
 	    (SH_CHAIN_NEXTP(bhp, vc, __bh)->td_off == bhp->td_off ||
 	    bhp->td_off == INVALID_ROFF ||
 	    IS_MAX_LSN(*VISIBLE_LSN(env, bhp)) ||
+	    F_ISSET(bhp, BH_UNREACHABLE) ||
 	    BH_OBSOLETE(bhp, hp->old_reader, vlsn))));
 
 	PERFMON3(env, mpool, evict, __memp_fns(dbmp, mfp), bhp->pgno, bhp);
-
+	if (FLD_ISSET(env->dbenv->verbose, DB_VERB_MVCC))
+		__db_msg(env, "bhfree pgno %lu roff %lx",
+		    (u_long)bhp->pgno, (u_long)R_OFFSET(dbmp->reginfo, bhp));
 	/*
 	 * Delete the buffer header from the hash bucket queue or the
 	 * version chain.
diff --git a/src/mp/mp_fget.c b/src/mp/mp_fget.c
index 5f9a4bf9..270135bd 100644
--- a/src/mp/mp_fget.c
+++ b/src/mp/mp_fget.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -53,15 +53,19 @@ __memp_fget_pp(dbmfp, pgnoaddr, txnp, flags, addrp)
 	 * time, which we don't want to do because one of our big goals in life
 	 * is to keep database files small.  It's sleazy as hell, but we catch
 	 * any attempt to actually write the file in memp_fput().
+	 *
+	 * CREATE, LAST, and NEW are mutually exclusive. DIRTY and EDIT are also
+	 * mutually exclusive - that is checked in __memp_fget() itself..
 	 */
+#undef	OKMODE
 #undef	OKFLAGS
-#define	OKFLAGS		(DB_MPOOL_CREATE | DB_MPOOL_DIRTY | \
-	    DB_MPOOL_EDIT | DB_MPOOL_LAST | DB_MPOOL_NEW)
+#define	OKMODE	(DB_MPOOL_CREATE | DB_MPOOL_LAST | DB_MPOOL_NEW)
+#define	OKFLAGS	(OKMODE | DB_MPOOL_DIRTY | DB_MPOOL_EDIT)
 	if (flags != 0) {
 		if ((ret = __db_fchk(env, "memp_fget", flags, OKFLAGS)) != 0)
 			return (ret);
 
-		switch (FLD_CLR(flags, DB_MPOOL_DIRTY | DB_MPOOL_EDIT)) {
+		switch (FLD_ISSET(flags, OKMODE)) {
 		case DB_MPOOL_CREATE:
 		case DB_MPOOL_LAST:
 		case DB_MPOOL_NEW:
@@ -131,6 +135,7 @@ __memp_fget(dbmfp, pgnoaddr, ip, txn, flags, addrp)
 #ifdef DIAGNOSTIC
 	DB_LOCKTAB *lt;
 	DB_LOCKER *locker;
+	int pagelock_err;
 #endif
 
 	*(void **)addrp = NULL;
@@ -274,7 +279,7 @@ retry:		MUTEX_LOCK(env, hp->mtx_hash);
 			 * the BTREE in a subsequent txn).
 			 */
 			if (bhp == NULL) {
-				ret = DB_PAGE_NOTFOUND;
+				ret = USR_ERR(env, DB_PAGE_NOTFOUND);
 				goto err;
 			}
 		}
@@ -303,7 +308,10 @@ retry:		MUTEX_LOCK(env, hp->mtx_hash);
 		MUTEX_UNLOCK(env, hp->mtx_hash);
 		h_locked = 0;
 		if (dirty || extending || makecopy || F_ISSET(bhp, BH_FROZEN)) {
-xlatch:			if (LF_ISSET(DB_MPOOL_TRY)) {
+#ifdef HAVE_SHARED_LATCHES
+xlatch:
+#endif
+			if (LF_ISSET(DB_MPOOL_TRY)) {
 				if ((ret =
 				    MUTEX_TRYLOCK(env, bhp->mtx_buf)) != 0)
 					goto err;
@@ -373,11 +381,11 @@ thawed:			need_free = (atomic_dec(env, &bhp->ref) == 0);
 			bhp = NULL;
 			goto retry;
 		} else if (dirty && SH_CHAIN_HASNEXT(bhp, vc)) {
-			ret = DB_LOCK_DEADLOCK;
+			ret = USR_ERR(env, DB_LOCK_DEADLOCK);
 			goto err;
 		} else if (F_ISSET(bhp, BH_FREED) && flags != DB_MPOOL_CREATE &&
 		    flags != DB_MPOOL_NEW && flags != DB_MPOOL_FREE) {
-			ret = DB_PAGE_NOTFOUND;
+			ret = USR_ERR(env, DB_PAGE_NOTFOUND);
 			goto err;
 		}
 
@@ -508,9 +516,13 @@ revive:			if (F_ISSET(bhp, BH_FREED))
 			/*
 			 * With multiversion databases, we might need to
 			 * allocate a new buffer into which we can copy the one
-			 * that we found.  In that case, check the last buffer
+			 * that we found.  In that case, check the old versions
 			 * in the chain to see whether we can reuse an obsolete
-			 * buffer.
+			 * or unreachable buffer. First see whether the oldest
+			 * version is truly obsolete. If not, look for somewhat
+			 * more recent versions which are no longer needed
+			 * because the snapshot transactions which once could
+			 * have seen them have now exited.
 			 *
 			 * To provide snapshot isolation, we need to make sure
 			 * that we've seen a buffer older than the oldest
@@ -523,24 +535,17 @@ reuse:			if ((makecopy || F_ISSET(bhp, BH_FROZEN)) &&
 			}
 			if ((makecopy || F_ISSET(bhp, BH_FROZEN)) &&
 			    SH_CHAIN_HASPREV(bhp, vc)) {
-				oldest_bhp = SH_CHAIN_PREVP(bhp, vc, __bh);
-				while (SH_CHAIN_HASPREV(oldest_bhp, vc))
-					oldest_bhp = SH_CHAIN_PREVP(
-					    oldest_bhp, vc, __bh);
-
-				if (BH_REFCOUNT(oldest_bhp) == 0 &&
-				    !BH_OBSOLETE(
-				    oldest_bhp, hp->old_reader, vlsn) &&
-				    (ret = __txn_oldest_reader(env,
-				    &hp->old_reader)) != 0)
+				if ((ret = __memp_find_obsolete_version(env,
+				    bhp, hp, &oldest_bhp)) != 0)
 					goto err;
-
-				if (BH_OBSOLETE(
-				    oldest_bhp, hp->old_reader, vlsn) &&
-				    BH_REFCOUNT(oldest_bhp) == 0) {
+				if (oldest_bhp != NULL) {
 					DB_ASSERT(env,
 					    !F_ISSET(oldest_bhp, BH_DIRTY));
 					atomic_inc(env, &oldest_bhp->ref);
+#ifdef HAVE_STATISTICS
+					if (SH_CHAIN_HASPREV(oldest_bhp, vc))
+						c_mp->stat.st_mvcc_reused++;
+#endif
 					if (F_ISSET(oldest_bhp, BH_FROZEN)) {
 						/*
 						 * This call will release the
@@ -606,7 +611,7 @@ newpg:		/*
 			    mfp->last_pgno >= mfp->maxpgno) {
 				__db_errx(env, DB_STR_A("3023",
 				    "%s: file limited to %lu pages", "%s %lu"),
-				    __memp_fn(dbmfp), (u_long)mfp->maxpgno);
+				    __memp_fn(dbmfp), (u_long)mfp->maxpgno + 1);
 				ret = ENOSPC;
 			} else
 				*pgnoaddr = mfp->last_pgno + 1;
@@ -615,7 +620,7 @@ newpg:		/*
 			if (mfp->maxpgno != 0 && *pgnoaddr > mfp->maxpgno) {
 				__db_errx(env, DB_STR_A("3024",
 				    "%s: file limited to %lu pages", "%s %lu"),
-				    __memp_fn(dbmfp), (u_long)mfp->maxpgno);
+				    __memp_fn(dbmfp), (u_long)mfp->maxpgno + 1);
 				ret = ENOSPC;
 			} else if (!extending)
 				extending = *pgnoaddr > mfp->last_pgno;
@@ -937,8 +942,17 @@ alloc:		/* Allocate a new buffer header and data space. */
 		 * need to make copy, so we now need to allocate another buffer
 		 * to hold the new copy.
 		 */
-		if (alloc_bhp == NULL)
+		if (alloc_bhp == NULL) {
+			if (FLD_ISSET(env->dbenv->verbose, DB_VERB_MVCC))
+				__db_msg(env,
+	"fget makecopy txn %08x %lu/%lu going to reuse pgno %d from %lu/%lu",
+				    txn->txnid, td == NULL ? 0L :
+				    (u_long)td->read_lsn.file, td == NULL ? 0L :
+				    (u_long)td->read_lsn.offset, bhp->pgno,
+				    (u_long)VISIBLE_LSN(env, bhp)->file,
+				    (u_long)VISIBLE_LSN(env, bhp)->offset);
 			goto reuse;
+		}
 
 		DB_ASSERT(env, bhp != NULL && alloc_bhp != bhp);
 		DB_ASSERT(env, bhp->td_off == INVALID_ROFF ||
@@ -1019,6 +1033,15 @@ alloc:		/* Allocate a new buffer header and data space. */
 		F_CLR(bhp, BH_EXCLUSIVE);
 		MUTEX_UNLOCK(env, bhp->mtx_buf);
 
+		if (FLD_ISSET(env->dbenv->verbose, DB_VERB_MVCC))
+			__db_msg(env,
+			    "fget makecopy txn %08x %lx pgno %d from %lu/%lu",
+			    txn->txnid, (u_long)R_OFFSET(infop, bhp),
+			    bhp->pgno, bhp->td_off == INVALID_ROFF ? 0L :
+			    (u_long)VISIBLE_LSN(env, bhp)->file,
+			    bhp->td_off == INVALID_ROFF ? 0L :
+			    (u_long)VISIBLE_LSN(env, bhp)->offset);
+
 		bhp = alloc_bhp;
 		DB_ASSERT(env, BH_REFCOUNT(bhp) > 0);
 		b_incr = 1;
@@ -1164,8 +1187,15 @@ alloc:		/* Allocate a new buffer header and data space. */
 			lt = env->lk_handle;
 			locker = (DB_LOCKER *)
 			    (R_ADDR(&lt->reginfo, ip->dbth_locker));
-			DB_ASSERT(env, __db_has_pagelock(env, locker, dbmfp,
-			    (PAGE*)bhp->buf, DB_LOCK_WRITE) == 0);
+			pagelock_err = __db_has_pagelock(env, locker, dbmfp,
+			    (PAGE *)bhp->buf, DB_LOCK_WRITE);
+			if (pagelock_err != 0) {
+				if (pagelock_err == DB_RUNRECOVERY)
+					return (pagelock_err);
+				__db_syserr(env, pagelock_err,
+				    "Locker %x has no page lock for pgno %d",
+				    locker->id, ((PAGE *)bhp->buf)->pgno);
+			}
 		}
 #endif
 
@@ -1228,3 +1258,85 @@ err:	/*
 
 	return (ret);
 }
+
+/*
+ * __memp_find_obsolete_version --
+ *
+ *	Search the version chain, from oldest to youngest, looking for buffers
+ *	which are no longer BH_VISIBLE() to any existing transaction.
+ *
+ *	The hash bucket is locked, no buffer is locked.
+ *
+ * PUBLIC: int  __memp_find_obsolete_version
+ * PUBLIC:	__P((ENV *, BH *, DB_MPOOL_HASH *, BH **));
+ */
+int
+__memp_find_obsolete_version(env, vis_bhp, hp, foundp)
+	ENV *env;
+	BH *vis_bhp;
+	DB_MPOOL_HASH *hp;
+	BH **foundp;
+{
+	BH *bhp;
+	DB_LSN *readers, vlsn;
+	int n_readers, ret;
+
+	*foundp = NULL;
+	readers = NULL;
+	ret = 0;
+	bhp = SH_CHAIN_PREVP(vis_bhp, vc, __bh);
+	while (SH_CHAIN_HASPREV(bhp, vc))
+		bhp = SH_CHAIN_PREVP(bhp, vc, __bh);
+
+	/*
+	 * The least-expensive case is finding an obsolete version without
+	 * needing to build the active snapshot transactionn list.
+	 */
+	if (BH_OBSOLETE(bhp, hp->old_reader, vlsn) && BH_REFCOUNT(bhp) == 0) {
+		*foundp = bhp;
+		goto out;
+	}
+
+	if ((ret = __txn_get_readers(env, &readers, &n_readers)) != 0)
+		goto out;
+
+	if (LOG_COMPARE(&readers[n_readers - 1], &hp->old_reader) > 0) {
+		hp->old_reader = readers[n_readers - 1];
+		if (BH_OBSOLETE(bhp, hp->old_reader, vlsn) &&
+		    BH_REFCOUNT(bhp) == 0) {
+			*foundp = bhp;
+			goto cleanup;
+		}
+	}
+
+	while ((bhp = SH_CHAIN_NEXT(bhp, vc, __bh)) != vis_bhp) {
+		if (BH_REFCOUNT(bhp) == 0 &&
+		    __memp_bh_unreachable(env, bhp, readers, n_readers)) {
+			*foundp = bhp;
+#ifdef DIAGNOSTIC
+			/*
+			 * Usually when the hash bucket is locked, the refcount
+			 * is incremented and the bucket unlocked before the
+			 * buffer is locked; this avoids mtx_buf deadlocks.
+			 * This unreachable version cannot be involved with any
+			 * deadlock-creating locking, though the head of the
+			 * version chain could be locked. No TRYLOCK needed.
+			 */
+			MUTEX_LOCK(env, bhp->mtx_buf);
+			F_SET(bhp, BH_UNREACHABLE);
+			MUTEX_UNLOCK(env, bhp->mtx_buf);
+#endif
+			break;
+		}
+	}
+
+cleanup:
+	if (readers != NULL)
+		__os_free(env, readers);
+out:
+	if (FLD_ISSET(env->dbenv->verbose, DB_VERB_MVCC) && *foundp != NULL)
+		__db_msg(env, "fget reusing %p pgno %d @%lu/%lu", bhp,
+		    bhp->pgno, (u_long)VISIBLE_LSN(env, bhp)->file,
+		    (u_long)VISIBLE_LSN(env, bhp)->offset);
+	return (ret);
+}
diff --git a/src/mp/mp_fmethod.c b/src/mp/mp_fmethod.c
index 41bd638c..4974f57c 100644
--- a/src/mp/mp_fmethod.c
+++ b/src/mp/mp_fmethod.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -315,7 +315,7 @@ __memp_set_lsn_offset(dbmfp, lsn_offset)
 
 /*
  * __memp_get_maxsize --
- *	Get the file's maximum size.
+ *	Get the file's maximum size, returning zeroes if none is set.
  */
 static int
 __memp_get_maxsize(dbmfp, gbytesp, bytesp)
@@ -334,11 +334,22 @@ __memp_get_maxsize(dbmfp, gbytesp, bytesp)
 		ENV_ENTER(env, ip);
 
 		MUTEX_LOCK(env, mfp->mutex);
-		*gbytesp = (u_int32_t)
-		    (mfp->maxpgno / (GIGABYTE / mfp->pagesize));
-		*bytesp = (u_int32_t)
-		    ((mfp->maxpgno % (GIGABYTE / mfp->pagesize)) *
-		    mfp->pagesize);
+		if (mfp->maxpgno == 0) {
+			*gbytesp = *bytesp = 0;
+		} else {
+			*gbytesp = (u_int32_t)
+			    (mfp->maxpgno / (GIGABYTE / mfp->pagesize));
+			*bytesp = (u_int32_t) (mfp->maxpgno %
+			    (GIGABYTE / mfp->pagesize) + 1) * mfp->pagesize;
+			/*
+			 * After converting from 0-based maxpgno to #pages, we
+			 * might have bumped into the next gigabyte boundary.
+			 */
+			if (*bytesp >= GIGABYTE) {
+				*bytesp -= GIGABYTE;
+				*gbytesp += 1;
+			}
+		}
 		MUTEX_UNLOCK(env, mfp->mutex);
 
 		ENV_LEAVE(env, ip);
@@ -348,8 +359,34 @@ __memp_get_maxsize(dbmfp, gbytesp, bytesp)
 }
 
 /*
+ * __memp_set_maxpgno --
+ *	Set the file's maxpgno from the configured max size. If that size is
+ *	pagesize or less then the filesize limit is disabled.
+ *
+ * PUBLIC: void __memp_set_maxpgno __P((MPOOLFILE *, u_int32_t, u_int32_t));
+ */
+void
+__memp_set_maxpgno(mfp, gbytes, bytes)
+	MPOOLFILE *mfp;
+	u_int32_t gbytes, bytes;
+{
+	if (gbytes == 0 && bytes <= mfp->pagesize)
+		mfp->maxpgno = 0;
+	else {
+		mfp->maxpgno = (db_pgno_t)
+		    (gbytes * (GIGABYTE / mfp->pagesize));
+		/* Round up to account for any fractional page. */
+		mfp->maxpgno += (db_pgno_t)
+		    ((bytes + mfp->pagesize - 1) / mfp->pagesize);
+		/* Convert from #pages to the zero-based max pgno. */
+		mfp->maxpgno--;
+	}
+}
+
+/*
  * __memp_set_maxsize --
- *	Set the file's maximum size.
+ *	Set the file's maximum size; if the size is <= pagesize then
+ *	remove any file size limit.
  */
 static int
 __memp_set_maxsize(dbmfp, gbytes, bytes)
@@ -368,10 +405,7 @@ __memp_set_maxsize(dbmfp, gbytes, bytes)
 		ENV_ENTER(env, ip);
 
 		MUTEX_LOCK(env, mfp->mutex);
-		mfp->maxpgno = (db_pgno_t)
-		    (gbytes * (GIGABYTE / mfp->pagesize));
-		mfp->maxpgno += (db_pgno_t)
-		    ((bytes + mfp->pagesize - 1) / mfp->pagesize);
+		__memp_set_maxpgno(mfp, gbytes, bytes);
 		MUTEX_UNLOCK(env, mfp->mutex);
 
 		ENV_LEAVE(env, ip);
diff --git a/src/mp/mp_fopen.c b/src/mp/mp_fopen.c
index ef7f886a..dbe7b9c8 100644
--- a/src/mp/mp_fopen.c
+++ b/src/mp/mp_fopen.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -89,8 +89,9 @@ __memp_fopen_pp(dbmfp, path, flags, mode, pagesize)
  * Generate the number of user opens.  If there is no backing file
  * there is an extra open count to keep the in memory db around.
  */
-#define MFP_OPEN_CNT(mfp)	((mfp)->mpf_cnt - ((mfp)->neutral_cnt +	\
+#define	MFP_OPEN_CNT(mfp)	((mfp)->mpf_cnt - ((mfp)->neutral_cnt +	\
 				   (u_int32_t)(mfp)->no_backing_file))
+#define	MP_IOINFO_RETRIES	5
 /*
  * __memp_fopen --
  *	DB_MPOOLFILE->open.
@@ -118,7 +119,7 @@ __memp_fopen(dbmfp, mfp, path, dirp, flags, mode, pgsize)
 	size_t maxmap;
 	db_pgno_t last_pgno;
 	u_int32_t bucket, mbytes, bytes, oflags, pagesize;
-	int refinc, ret, isdir;
+	int isdir, refinc, ret, tries;
 	char *rpath;
 
 	/* If this handle is already open, return. */
@@ -249,7 +250,7 @@ __memp_fopen(dbmfp, mfp, path, dirp, flags, mode, pgsize)
 				if (MFP_OPEN_CNT(mfp) > 0 &&
 				     atomic_read(&mfp->multiversion) == 0) {
 mvcc_err:				__db_errx(env, DB_STR("3041",
-"DB_MULTIVERSION cannot be specified on a database file which is already open"));
+"DB_MULTIVERSION cannot be specified on a database file that is already open"));
 					ret = EINVAL;
 					goto err;
 				}
@@ -399,11 +400,44 @@ mvcc_err:				__db_errx(env, DB_STR("3041",
 			if (LF_ISSET(DB_ODDFILESIZE))
 				bytes -= (u_int32_t)(bytes % pagesize);
 			else {
-				__db_errx(env, DB_STR_A("3037",
-		    "%s: file size not a multiple of the pagesize", "%s"),
-				    rpath);
-				ret = EINVAL;
-				goto err;
+				/*
+				 * If the file size is not a multiple of the
+				 * pagesize, it is likely because the ioinfo
+				 * call is racing with a write that is extending
+				 * the file.  Many file systems will extend
+				 * in fs block size units, and if the pagesize
+				 * is larger than that, we can briefly see a
+				 * file size that is not a multiple of pagesize.
+				 *
+				 * Yield the processor to allow that to finish
+				 * and try again a few times.
+				 */
+				tries = 0;
+				STAT((mp->stat.st_oddfsize_detect++));
+				while (tries < MP_IOINFO_RETRIES) {
+					if ((ret = __os_ioinfo(env, rpath,
+					    dbmfp->fhp, &mbytes, &bytes,
+					    NULL)) != 0) {
+						__db_err(env, ret, "%s", rpath);
+						goto err;
+					}
+					if (bytes % pagesize != 0) {
+						__os_yield(env, 0, 50000);
+						tries++;
+					} else {
+					    STAT((
+					    mp->stat.st_oddfsize_resolve++));
+					    break;
+					}
+				}
+				if (tries == MP_IOINFO_RETRIES) {
+					__db_errx(env, DB_STR_A("3043",
+    "%s: file size (%lu %lu) not a multiple of the pagesize %lu",
+    "%s %lu %lu %lu"),
+    rpath, (u_long)mbytes, (u_long)bytes, (u_long)pagesize);
+					ret = EINVAL;
+					goto err;
+				}
 			}
 		}
 
@@ -786,13 +820,7 @@ __memp_mpf_alloc(dbmp, dbmfp, path, pagesize, flags, retmfp)
 	mfp->lsn_off = dbmfp->lsn_offset;
 	mfp->clear_len = dbmfp->clear_len;
 	mfp->priority = dbmfp->priority;
-	if (dbmfp->gbytes != 0 || dbmfp->bytes != 0) {
-		mfp->maxpgno = (db_pgno_t)
-		    (dbmfp->gbytes * (GIGABYTE / mfp->pagesize));
-		mfp->maxpgno += (db_pgno_t)
-		    ((dbmfp->bytes + mfp->pagesize - 1) /
-		    mfp->pagesize);
-	}
+	__memp_set_maxpgno(mfp, dbmfp->gbytes, dbmfp->bytes);
 	if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE))
 		mfp->no_backing_file = 1;
 	if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_UNLINK))
@@ -1019,6 +1047,7 @@ __memp_fclose(dbmfp, flags)
 					ret = t_ret;
 				__os_free(env, rpath);
 			}
+			mfp->unlink_on_close = 0;
 		}
 		if (MFP_OPEN_CNT(mfp) == 0) {
 			F_CLR(mfp, MP_NOT_DURABLE);
@@ -1068,6 +1097,7 @@ __memp_mf_discard(dbmp, mfp, hp_locked)
 	DB_MPOOL_STAT *sp;
 #endif
 	MPOOL *mp;
+	char *rpath;
 	int need_sync, ret, t_ret;
 
 	env = dbmp->env;
@@ -1095,6 +1125,23 @@ __memp_mf_discard(dbmp, mfp, hp_locked)
 	 */
 	mfp->deadfile = 1;
 
+	/* We should unlink the file if necessary. */
+	if (mfp->block_cnt == 0 && mfp->mpf_cnt == 0 && mfp->unlink_on_close &&
+	    !F_ISSET(mfp, MP_TEMP) && !mfp->no_backing_file) {
+		if ((t_ret = __db_appname(env, DB_APP_DATA,
+		    R_ADDR(dbmp->reginfo, mfp->path_off), NULL,
+		    &rpath)) != 0 && ret == 0)
+			ret = t_ret;
+		if (t_ret == 0) {
+			if ((t_ret = __os_unlink(
+			    dbmp->env, rpath, 0)) != 0 && ret == 0)
+				ret = t_ret;
+			__os_free(env, rpath);
+		}
+		mfp->unlink_on_close = 0;
+		need_sync = 0;
+	}
+
 	/* Discard the mutex we're holding and return it too the pool. */
 	MUTEX_UNLOCK(env, mfp->mutex);
 	if ((t_ret = __mutex_free(env, &mfp->mutex)) != 0 && ret == 0)
diff --git a/src/mp/mp_fput.c b/src/mp/mp_fput.c
index 7a900fd0..06b30fd4 100644
--- a/src/mp/mp_fput.c
+++ b/src/mp/mp_fput.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -52,7 +52,8 @@ __memp_fput_pp(dbmfp, pgaddr, priority, flags)
 
 /*
  * __memp_fput --
- *	DB_MPOOLFILE->put.
+ *	DB_MPOOLFILE->put. Release this reference to the page. If the reference
+ * count drop to zero adjust the buffer's cache priority.
  *
  * PUBLIC: int __memp_fput __P((DB_MPOOLFILE *,
  * PUBLIC:      DB_THREAD_INFO *, void *, DB_CACHE_PRIORITY));
diff --git a/src/mp/mp_fset.c b/src/mp/mp_fset.c
index 1129853f..770ec5c8 100644
--- a/src/mp/mp_fset.c
+++ b/src/mp/mp_fset.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/mp/mp_method.c b/src/mp/mp_method.c
index 7afae248..56d6c42b 100644
--- a/src/mp/mp_method.c
+++ b/src/mp/mp_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -67,6 +67,7 @@ __memp_get_cachesize(dbenv, gbytesp, bytesp, ncachep)
 	int *ncachep;
 {
 	DB_MPOOL *dbmp;
+	DB_THREAD_INFO *ip;
 	ENV *env;
 	MPOOL *mp;
 
@@ -78,12 +79,16 @@ __memp_get_cachesize(dbenv, gbytesp, bytesp, ncachep)
 	if (MPOOL_ON(env)) {
 		dbmp = env->mp_handle;
 		mp = dbmp->reginfo[0].primary;
+		ENV_ENTER(env, ip);
+		MUTEX_LOCK(env, mp->mtx_resize);
 		if (gbytesp != NULL)
 			*gbytesp = mp->gbytes;
 		if (bytesp != NULL)
 			*bytesp = mp->bytes;
 		if (ncachep != NULL)
 			*ncachep = (int)mp->nreg;
+		MUTEX_UNLOCK(env, mp->mtx_resize);
+		ENV_LEAVE(env, ip);
 	} else {
 		if (gbytesp != NULL)
 			*gbytesp = dbenv->mp_gbytes;
@@ -380,7 +385,7 @@ __memp_set_mp_max_write(dbenv, maxwrite, maxwrite_sleep)
 	env = dbenv->env;
 
 	ENV_NOT_CONFIGURED(env,
-	    env->mp_handle, "DB_ENV->get_mp_max_write", DB_INIT_MPOOL);
+	    env->mp_handle, "DB_ENV->set_mp_max_write", DB_INIT_MPOOL);
 
 	if (MPOOL_ON(env)) {
 		dbmp = env->mp_handle;
@@ -448,7 +453,7 @@ __memp_set_mp_mmapsize(dbenv, mp_mmapsize)
 	env = dbenv->env;
 
 	ENV_NOT_CONFIGURED(env,
-	    env->mp_handle, "DB_ENV->set_mp_max_mmapsize", DB_INIT_MPOOL);
+	    env->mp_handle, "DB_ENV->set_mp_mmapsize", DB_INIT_MPOOL);
 
 	if (MPOOL_ON(env)) {
 		dbmp = env->mp_handle;
@@ -512,7 +517,7 @@ __memp_set_mp_pagesize(dbenv, mp_pagesize)
 	env = dbenv->env;
 
 	ENV_NOT_CONFIGURED(env,
-	    env->mp_handle, "DB_ENV->get_mp_max_mmapsize", DB_INIT_MPOOL);
+	    env->mp_handle, "DB_ENV->set_mp_pagesize", DB_INIT_MPOOL);
 	ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mp_pagesize");
 
 	dbenv->mp_pagesize = mp_pagesize;
@@ -561,7 +566,7 @@ __memp_set_mp_tablesize(dbenv, mp_tablesize)
 	env = dbenv->env;
 
 	ENV_NOT_CONFIGURED(env,
-	    env->mp_handle, "DB_ENV->get_mp_max_mmapsize", DB_INIT_MPOOL);
+	    env->mp_handle, "DB_ENV->set_mp_tablesize", DB_INIT_MPOOL);
 	ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mp_tablesize");
 
 	dbenv->mp_tablesize = mp_tablesize;
@@ -583,7 +588,7 @@ __memp_get_mp_mtxcount(dbenv, mp_mtxcountp)
 	env = dbenv->env;
 
 	ENV_NOT_CONFIGURED(env,
-	    env->mp_handle, "DB_ENV->get_mp_max_mtxcount", DB_INIT_MPOOL);
+	    env->mp_handle, "DB_ENV->get_mp_mtxcount", DB_INIT_MPOOL);
 
 	if (MPOOL_ON(env)) {
 		dbmp = env->mp_handle;
@@ -610,7 +615,7 @@ __memp_set_mp_mtxcount(dbenv, mp_mtxcount)
 	env = dbenv->env;
 
 	ENV_NOT_CONFIGURED(env,
-	    env->mp_handle, "DB_ENV->get_mp_max_mmapsize", DB_INIT_MPOOL);
+	    env->mp_handle, "DB_ENV->set_mp_mtxcount", DB_INIT_MPOOL);
 	ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mp_mtxcount");
 
 	dbenv->mp_mtxcount = mp_mtxcount;
@@ -870,7 +875,7 @@ __memp_ftruncate(dbmfp, txn, ip, pgno, flags)
 	    !mfp->no_backing_file && pgno <= mfp->last_flushed_pgno)
 #ifdef HAVE_FTRUNCATE
 		ret = __os_truncate(env,
-		    dbmfp->fhp, pgno, mfp->pagesize);
+		    dbmfp->fhp, pgno, mfp->pagesize, 0);
 #else
 		ret = __db_zero_extend(env,
 		    dbmfp->fhp, pgno, mfp->last_pgno, mfp->pagesize);
diff --git a/src/mp/mp_mvcc.c b/src/mp/mp_mvcc.c
index 47531528..b51ae135 100644
--- a/src/mp/mp_mvcc.c
+++ b/src/mp/mp_mvcc.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2006, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2006, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -151,6 +151,11 @@ __memp_bh_freeze(dbmp, infop, hp, bhp, need_frozenp)
 	real_name = NULL;
 	fhp = NULL;
 
+	if (FLD_ISSET(env->dbenv->verbose, DB_VERB_MVCC))
+		__db_msg(env, "freeze %s %d @%lu/%lu", __memp_fns(dbmp, mfp),
+		    bhp->pgno, (u_long)VISIBLE_LSN(env, bhp)->file,
+		    (u_long)VISIBLE_LSN(env, bhp)->offset);
+
 	MVCC_MPROTECT(bhp->buf, pagesize, PROT_READ | PROT_WRITE);
 
 	MPOOL_REGION_LOCK(env, infop);
@@ -161,7 +166,7 @@ __memp_bh_freeze(dbmp, infop, hp, bhp, need_frozenp)
 	} else {
 		*need_frozenp = 1;
 
-		/* There might be a small amount of unallocated space. */
+		/* There might be enough space for a single-item block. */
 		if (__env_alloc(infop,
 		    sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE),
 		    &frozen_alloc) == 0) {
@@ -405,6 +410,12 @@ __memp_bh_thaw(dbmp, infop, hp, frozen_bhp, alloc_bhp)
 	ret = 0;
 	real_name = NULL;
 
+	if (FLD_ISSET(env->dbenv->verbose, DB_VERB_MVCC))
+		__db_msg(env, "thaw %s %d @%lu/%lu", __memp_fns(dbmp, mfp),
+		    frozen_bhp->pgno,
+		    (u_long)VISIBLE_LSN(env, frozen_bhp)->file,
+		    (u_long)VISIBLE_LSN(env, frozen_bhp)->offset);
+
 	MUTEX_REQUIRED(env, hp->mtx_hash);
 	DB_ASSERT(env, F_ISSET(frozen_bhp, BH_EXCLUSIVE) || alloc_bhp == NULL);
 	h_locked = 1;
@@ -414,7 +425,8 @@ __memp_bh_thaw(dbmp, infop, hp, frozen_bhp, alloc_bhp)
 	DB_ASSERT(env, alloc_bhp != NULL ||
 	    SH_CHAIN_SINGLETON(frozen_bhp, vc) ||
 	    (SH_CHAIN_HASNEXT(frozen_bhp, vc) &&
-	    BH_OBSOLETE(frozen_bhp, hp->old_reader, vlsn)));
+	    BH_OBSOLETE(frozen_bhp, hp->old_reader, vlsn)) ||
+	    F_ISSET(frozen_bhp, BH_UNREACHABLE));
 	DB_ASSERT(env, alloc_bhp == NULL || !F_ISSET(alloc_bhp, BH_FROZEN));
 
 	spgno = ((BH_FROZEN_PAGE *)frozen_bhp)->spgno;
@@ -516,7 +528,7 @@ __memp_bh_thaw(dbmp, infop, hp, frozen_bhp, alloc_bhp)
 		else {
 			maxpgno -= (db_pgno_t)ntrunc;
 			if ((ret = __os_truncate(env, fhp,
-			    maxpgno + 1, pagesize)) != 0)
+			    maxpgno + 1, pagesize, 0)) != 0)
 				goto err;
 
 			/* Fix up the linked list */
diff --git a/src/mp/mp_region.c b/src/mp/mp_region.c
index 07134de7..ba836cf4 100644
--- a/src/mp/mp_region.c
+++ b/src/mp/mp_region.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -11,7 +11,7 @@
 #include "db_int.h"
 #include "dbinc/mp.h"
 
-static int	__memp_init_config __P((ENV *, MPOOL *));
+static int	__memp_init_config __P((ENV *, MPOOL *, int));
 static void	__memp_region_size __P((ENV *, roff_t *, u_int32_t *));
 
 #define	MPOOL_DEFAULT_PAGESIZE	(4 * 1024)
@@ -34,7 +34,7 @@ __memp_open(env, create_ok)
 	roff_t cache_size, max_size, reg_size;
 	u_int i, max_nreg;
 	u_int32_t htab_buckets, *regids;
-	int ret;
+	int create, ret;
 
 	dbenv = env->dbenv;
 	cache_size = 0;
@@ -77,7 +77,8 @@ __memp_open(env, create_ok)
 	 * If we created the region, initialize it.  Create or join any
 	 * additional regions.
 	 */
-	if (F_ISSET(&reginfo, REGION_CREATE)) {
+	create = F_ISSET(&reginfo, REGION_CREATE);
+	if (create) {
 		/*
 		 * We define how many regions there are going to be, allocate
 		 * the REGINFO structures and create them.  Make sure we don't
@@ -167,23 +168,38 @@ __memp_open(env, create_ok)
 	env->mp_handle = dbmp;
 
 	/* A process joining the region may reset the mpool configuration. */
-	if ((ret = __memp_init_config(env, mp)) != 0)
+	if ((ret = __memp_init_config(env, mp, create)) != 0)
 		return (ret);
 
 	return (0);
 
-err:	env->mp_handle = NULL;
-	if (dbmp->reginfo != NULL && dbmp->reginfo[0].addr != NULL) {
-		for (i = 0; i < dbenv->mp_ncache; ++i)
+err:	(void)__mutex_free(env, &dbmp->mutex);
+	(void)__memp_region_detach(env, dbmp);
+	return (ret);
+}
+
+/* __memp_region_detach
+ *	Detach from any attached mempool regions.
+ *
+ * PUBLIC: int __memp_region_detach __P((ENV *, DB_MPOOL *));
+ */
+int
+__memp_region_detach(env, dbmp)
+	ENV *env;
+	DB_MPOOL *dbmp;
+{
+	u_int i;
+
+	if (dbmp != NULL &&
+	    dbmp->reginfo != NULL && dbmp->reginfo[0].addr != NULL) {
+		for (i = 0; i < env->dbenv->mp_ncache; ++i)
 			if (dbmp->reginfo[i].id != INVALID_REGION_ID)
 				(void)__env_region_detach(
 				    env, &dbmp->reginfo[i], 0);
 		__os_free(env, dbmp->reginfo);
 	}
-
-	(void)__mutex_free(env, &dbmp->mutex);
-	__os_free(env, dbmp);
-	return (ret);
+	env->mp_handle = NULL;
+	return (0);
 }
 
 /*
@@ -207,7 +223,7 @@ __memp_init(env, dbmp, reginfo_off, htab_buckets, max_nreg)
 	MPOOL *mp, *main_mp;
 	REGINFO *infop;
 	db_mutex_t mtx_base, mtx_discard, mtx_prev;
-	u_int32_t i;
+	u_int32_t i, mp_mtxcount;
 	int ret;
 	void *p;
 
@@ -224,6 +240,23 @@ __memp_init(env, dbmp, reginfo_off, htab_buckets, max_nreg)
 	    __mutex_alloc(env, MTX_MPOOL_REGION, 0, &mp->mtx_region)) != 0)
 		return (ret);
 
+	/*
+	 * Intializing the first mpool region allocates the mpool region id
+	 * array, file table and, if not ENV_PRIVATE, all the cache regions'
+	 * hash bucket mutexes in a single contiguous block of mutex ids, which
+	 * remain allocated when the cache is resized. The block is 'known' to
+	 * start with the first id (mtx_base), and to end #regions * mp_mtxcount
+	 * later. In private environments, mutex ids are not smallish integers,
+	 * but __env_alloc()'d pointers. Since a range of (base, count) doesn't
+	 * work for these likely-scattered mutexes, we allocate private threaded
+	 * mutexes as they are needed. Private non-threaded caches don't need
+	 * any mutexes at all.
+	 */
+	if ((mp_mtxcount = dbenv->mp_mtxcount) == 0)
+		mp_mtxcount = dbenv->mp_mtxcount = htab_buckets;
+	if (!MUTEX_ON(env) ||
+	    F_ISSET(env, ENV_PRIVATE | ENV_THREAD) == ENV_PRIVATE)
+		mp_mtxcount = dbenv->mp_mtxcount = 0;
 	if (reginfo_off == 0) {
 		ZERO_LSN(mp->lsn);
 
@@ -248,15 +281,10 @@ __memp_init(env, dbmp, reginfo_off, htab_buckets, max_nreg)
 			atomic_init(&htab[i].hash_page_dirty, 0);
 		}
 
-		/*
-		 * Allocate all of the hash bucket mutexes up front.  We do
-		 * this so that we don't need to free and reallocate mutexes as
-		 * the cache is resized.
-		 */
 		mtx_base = mtx_prev = MUTEX_INVALID;
-		if (!MUTEX_ON(env) || F_ISSET(env, ENV_PRIVATE))
+		if (F_ISSET(env, ENV_PRIVATE))
 			goto no_prealloc;
-		for (i = 0; i < mp->max_nreg * dbenv->mp_mtxcount; i++) {
+		for (i = 0; i < mp->max_nreg * mp_mtxcount; i++) {
 			if ((ret = __mutex_alloc(env, MTX_MPOOL_HASH_BUCKET,
 			    DB_MUTEX_SHARED, &mtx_discard)) != 0)
 				return (ret);
@@ -274,13 +302,12 @@ __memp_init(env, dbmp, reginfo_off, htab_buckets, max_nreg)
 	}
 
 	/*
-	 * We preallocated all of the mutexes in a block, so for regions after
-	 * the first, we skip mutexes in use in earlier regions.  Each region
-	 * has the same number of buckets
+	 * If we preallocated all the mutexes, then in regions after the first,
+	 * we skip mutexes in use in earlier regions. Each region has the same
+	 * number of buckets.
 	 */
 no_prealloc:
-	if (MUTEX_ON(env))
-		mtx_base += reginfo_off * dbenv->mp_mtxcount;
+	mtx_base += reginfo_off * mp_mtxcount;
 
 	/* Allocate hash table space and initialize it. */
 	if ((ret = __env_alloc(infop,
@@ -289,18 +316,21 @@ no_prealloc:
 	mp->htab = R_OFFSET(infop, htab);
 	for (i = 0; i < htab_buckets; i++) {
 		hp = &htab[i];
-		if (!MUTEX_ON(env) || dbenv->mp_mtxcount == 0)
+		/*
+		 * Set mtx_hash to do no locking, or share a mutex with an
+		 * earlier hash bucket in this region, or assign it from the
+		 * block of mutexes allocated above, or (in a private
+		 * environment) allocate a new mutex.
+		 */
+		if (mp_mtxcount == 0)
 			hp->mtx_hash = MUTEX_INVALID;
-		else if (F_ISSET(env, ENV_PRIVATE)) {
-			if (i >= dbenv->mp_mtxcount)
-				hp->mtx_hash =
-				    htab[i % dbenv->mp_mtxcount].mtx_hash;
-			else if
-			    ((ret = __mutex_alloc(env, MTX_MPOOL_HASH_BUCKET,
-			    DB_MUTEX_SHARED, &hp->mtx_hash)) != 0)
-				return (ret);
-		} else
-			hp->mtx_hash = mtx_base + (i % dbenv->mp_mtxcount);
+		else if (i >= mp_mtxcount)
+			hp->mtx_hash = htab[i % mp_mtxcount].mtx_hash;
+		else if (!F_ISSET(env, ENV_PRIVATE))
+			hp->mtx_hash = mtx_base + i;
+		else if ((ret = __mutex_alloc(env, MTX_MPOOL_HASH_BUCKET,
+		    DB_MUTEX_SHARED, &hp->mtx_hash)) != 0)
+			return (ret);
 		SH_TAILQ_INIT(&hp->hash_bucket);
 		atomic_init(&hp->hash_page_dirty, 0);
 #ifdef HAVE_STATISTICS
@@ -311,7 +341,7 @@ no_prealloc:
 		ZERO_LSN(hp->old_reader);
 	}
 	mp->htab_buckets = htab_buckets;
-	mp->htab_mutexes = dbenv->mp_mtxcount;
+	mp->htab_mutexes = mp_mtxcount;
 	mp->pagesize = dbenv->mp_pagesize == 0 ?
 		MPOOL_DEFAULT_PAGESIZE : dbenv->mp_pagesize;
 
@@ -443,11 +473,21 @@ __memp_region_mutex_count(env)
 	dbenv = env->dbenv;
 
 	__memp_region_size(env, &reg_size, &htab_buckets);
-	if (F_ISSET(env->dbenv, DB_ENV_MULTIVERSION))
-		pgsize = sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE);
-	if ((pgsize = dbenv->mp_pagesize) == 0)
-		pgsize = MPOOL_DEFAULT_PAGESIZE;
+	if (dbenv->mp_mtxcount != 0)
+		htab_buckets = dbenv->mp_mtxcount;
 	max_region = __memp_max_regions(env);
+	if ((pgsize = dbenv->mp_pagesize) == 0) {
+		/*
+		 * If MVCC is on during environment creation, provide enough
+		 * mutexes so that half the cache can be frozen buffer headers.
+		 */
+		if (F_ISSET(env->dbenv, DB_ENV_MULTIVERSION))
+			pgsize = (MPOOL_DEFAULT_PAGESIZE +
+			    sizeof(BH_FROZEN_ALLOC) +
+			    sizeof(BH_FROZEN_PAGE)) / 2;
+		else
+			pgsize = MPOOL_DEFAULT_PAGESIZE;
+	}
 
 	/*
 	 * We need a couple of mutexes for the region itself, one for each
@@ -456,10 +496,6 @@ __memp_region_mutex_count(env)
 	 * hash bucket. We then need one mutex per page in the cache,
 	 * the worst case is really big if the pages are 512 bytes.
 	 */
-	if (dbenv->mp_mtxcount != 0)
-		htab_buckets = dbenv->mp_mtxcount;
-	else
-		dbenv->mp_mtxcount = htab_buckets;
 	num_per_cache = htab_buckets + (u_int32_t)(reg_size / pgsize);
 	return ((max_region * num_per_cache) + 50 + MPOOL_FILE_BUCKETS);
 }
@@ -469,23 +505,39 @@ __memp_region_mutex_count(env)
  *	Initialize shared configuration information.
  */
 static int
-__memp_init_config(env, mp)
+__memp_init_config(env, mp, create)
 	ENV *env;
 	MPOOL *mp;
+	int create;
 {
 	DB_ENV *dbenv;
 
 	dbenv = env->dbenv;
 
 	MPOOL_SYSTEM_LOCK(env);
-	if (dbenv->mp_mmapsize != 0)
+	if (create) {
 		mp->mp_mmapsize = (db_size_t)dbenv->mp_mmapsize;
-	if (dbenv->mp_maxopenfd != 0)
 		mp->mp_maxopenfd = dbenv->mp_maxopenfd;
-	if (dbenv->mp_maxwrite != 0)
 		mp->mp_maxwrite = dbenv->mp_maxwrite;
-	if (dbenv->mp_maxwrite_sleep != 0)
 		mp->mp_maxwrite_sleep = dbenv->mp_maxwrite_sleep;
+	} else {
+		if (dbenv->mp_mmapsize != 0 &&
+		    mp->mp_mmapsize != (db_size_t)dbenv->mp_mmapsize)
+			__db_msg(env, DB_STR("3044",
+"Warning: Ignoring maximum memory map size when joining environment"));
+
+		if (dbenv->mp_maxopenfd != 0 &&
+		    mp->mp_maxopenfd != dbenv->mp_maxopenfd)
+			__db_msg(env, DB_STR("3045",
+"Warning: Ignoring max open file descriptors value when joining environment"));
+
+		if ((dbenv->mp_maxwrite != 0 &&
+		    mp->mp_maxwrite != dbenv->mp_maxwrite) ||
+		    (dbenv->mp_maxwrite_sleep != 0 &&
+		    mp->mp_maxwrite_sleep != dbenv->mp_maxwrite_sleep))
+			__db_msg(env, DB_STR("3046",
+"Warning: Ignoring maximum sequential writes value when joining environment"));
+	}
 	MPOOL_SYSTEM_UNLOCK(env);
 
 	return (0);
@@ -501,22 +553,18 @@ int
 __memp_env_refresh(env)
 	ENV *env;
 {
-	BH *bhp;
-	BH_FROZEN_ALLOC *frozen_alloc;
 	DB_MPOOL *dbmp;
 	DB_MPOOLFILE *dbmfp;
-	DB_MPOOL_HASH *hp;
 	DB_MPREG *mpreg;
 	MPOOL *mp, *c_mp;
 	REGINFO *infop;
-	u_int32_t bucket, i, nreg;
+	u_int32_t i, nreg;
 	int ret, t_ret;
 
 	ret = 0;
 	dbmp = env->mp_handle;
 	mp = dbmp->reginfo[0].primary;
 	nreg = mp->nreg;
-	hp = R_ADDR(&dbmp->reginfo[0], mp->htab);
 
 	/*
 	 * If a private region, return the memory to the heap.  Not needed for
@@ -526,49 +574,20 @@ __memp_env_refresh(env)
 	if (!F_ISSET(env, ENV_PRIVATE))
 		goto not_priv;
 
-	/* Discard buffers. */
 	for (i = 0; i < nreg; ++i) {
 		infop = &dbmp->reginfo[i];
-		c_mp = infop->primary;
-		for (hp = R_ADDR(infop, c_mp->htab), bucket = 0;
-		    bucket < c_mp->htab_buckets; ++hp, ++bucket) {
-			while ((bhp = SH_TAILQ_FIRST(
-			    &hp->hash_bucket, __bh)) != NULL)
-				if (F_ISSET(bhp, BH_FROZEN))
-					SH_TAILQ_REMOVE(
-					    &hp->hash_bucket, bhp,
-					    hq, __bh);
-				else {
-					if (F_ISSET(bhp, BH_DIRTY)) {
-						atomic_dec(env,
-						     &hp->hash_page_dirty);
-						F_CLR(bhp,
-						    BH_DIRTY | BH_DIRTY_CREATE);
-					}
-					atomic_inc(env, &bhp->ref);
-					if ((t_ret = __memp_bhfree(dbmp, infop,
-					    R_ADDR(dbmp->reginfo,
-					    bhp->mf_offset), hp, bhp,
-					    BH_FREE_FREEMEM |
-					    BH_FREE_UNLOCKED)) != 0 && ret == 0)
-						ret = t_ret;
-				}
-		}
-		MPOOL_REGION_LOCK(env, infop);
-		while ((frozen_alloc = SH_TAILQ_FIRST(
-		    &c_mp->alloc_frozen, __bh_frozen_a)) != NULL) {
-			SH_TAILQ_REMOVE(&c_mp->alloc_frozen, frozen_alloc,
-			    links, __bh_frozen_a);
-			__env_alloc_free(infop, frozen_alloc);
-		}
-		MPOOL_REGION_UNLOCK(env, infop);
+		if ((t_ret = __memp_region_bhfree(infop)) != 0 && ret == 0)
+			ret = t_ret;
 	}
 
 not_priv:
 	/* Discard DB_MPOOLFILEs. */
 	while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL)
-		if ((t_ret = __memp_fclose(dbmfp, DB_FLUSH)) != 0 && ret == 0)
-			ret = t_ret;
+		if ((t_ret = __memp_fclose(dbmfp, DB_FLUSH)) != 0) {
+			if (ret == 0)
+				ret = t_ret;
+			break;
+		}
 
 	/* Discard DB_MPREGs. */
 	if (dbmp->pg_inout != NULL)
@@ -618,3 +637,62 @@ not_priv:
 	env->mp_handle = NULL;
 	return (ret);
 }
+
+/*
+ * __memp_region_bhfree --
+ *	Discard the buffers for a region.
+ *
+ * PUBLIC: int __memp_region_bhfree __P((REGINFO *));
+ */
+int
+__memp_region_bhfree(infop)
+	REGINFO *infop;
+{
+	BH *bhp;
+	BH_FROZEN_ALLOC *frozen_alloc;
+	DB_MPOOL *dbmp;
+	DB_MPOOL_HASH *hp;
+	ENV *env;
+	MPOOL *c_mp;
+	u_int32_t bucket;
+	int ret, t_ret;
+
+	env = infop->env;
+	dbmp = env->mp_handle;
+	ret = 0;
+
+	/* Discard buffers. */
+	c_mp = infop->primary;
+	for (hp = R_ADDR(infop, c_mp->htab), bucket = 0;
+	    bucket < c_mp->htab_buckets; ++hp, ++bucket) {
+		while ((bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) != NULL)
+			if (F_ISSET(bhp, BH_FROZEN))
+				SH_TAILQ_REMOVE(&hp->hash_bucket,
+				    bhp, hq, __bh);
+			else {
+				if (F_ISSET(bhp, BH_DIRTY)) {
+					atomic_dec(env, &hp->hash_page_dirty);
+					F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE);
+				}
+				atomic_inc(env, &bhp->ref);
+				if ((t_ret = __memp_bhfree(dbmp, infop,
+				    R_ADDR(dbmp->reginfo, bhp->mf_offset),
+				    hp, bhp, BH_FREE_FREEMEM |
+				    BH_FREE_UNLOCKED)) != 0) {
+				    	if (ret == 0)
+						ret = t_ret;
+					break;
+				}
+			}
+	}
+	MPOOL_REGION_LOCK(env, infop);
+	while ((frozen_alloc = SH_TAILQ_FIRST(
+	    &c_mp->alloc_frozen, __bh_frozen_a)) != NULL) {
+		SH_TAILQ_REMOVE(&c_mp->alloc_frozen,
+		    frozen_alloc, links, __bh_frozen_a);
+		__env_alloc_free(infop, frozen_alloc);
+	}
+	MPOOL_REGION_UNLOCK(env, infop);
+
+	return (ret);
+}
diff --git a/src/mp/mp_register.c b/src/mp/mp_register.c
index dc7015a7..cc59af9c 100644
--- a/src/mp/mp_register.c
+++ b/src/mp/mp_register.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/mp/mp_resize.c b/src/mp/mp_resize.c
index 97719554..932a1baa 100644
--- a/src/mp/mp_resize.c
+++ b/src/mp/mp_resize.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2006, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2006, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -126,12 +126,13 @@ __memp_merge_buckets(dbmp, new_nbuckets, old_bucket, new_bucket)
 	MPOOLFILE *mfp;
 	REGINFO *new_infop, *old_infop;
 	u_int32_t bucket, high_mask, new_region, old_region;
-	int ret;
+	int expanding, ret;
 
 	env = dbmp->env;
 	mp = dbmp->reginfo[0].primary;
 	new_bhp = NULL;
 	ret = 0;
+	expanding = (mp->nbuckets > new_nbuckets) ? 0 : 1;
 
 	MP_MASK(new_nbuckets, high_mask);
 
@@ -150,36 +151,42 @@ __memp_merge_buckets(dbmp, new_nbuckets, old_bucket, new_bucket)
 	/*
 	 * Before merging, we need to check that there are no old buffers left
 	 * in the target hash bucket after a previous split.
+	 * Only free the buffers if we are expanding into new buckets. If
+	 * we are contracting, the buffers in the original (old) bucket should
+	 * not be freed.
 	 */
 free_old:
-	MUTEX_LOCK(env, new_hp->mtx_hash);
-	SH_TAILQ_FOREACH(bhp, &new_hp->hash_bucket, hq, __bh) {
-		MP_BUCKET(bhp->mf_offset, bhp->pgno, mp->nbuckets, bucket);
+	if (expanding != 0) {
+		MUTEX_LOCK(env, new_hp->mtx_hash);
+		SH_TAILQ_FOREACH(bhp, &new_hp->hash_bucket, hq, __bh) {
+			MP_BUCKET(
+			    bhp->mf_offset, bhp->pgno, mp->nbuckets, bucket);
+
+			if (bucket != new_bucket) {
+				/*
+				 * There is no way that an old buffer can be
+				 * locked after a split, since everyone will
+				 *  look for it in the new hash bucket.
+				 */
+				DB_ASSERT(env, !F_ISSET(bhp, BH_DIRTY) &&
+				    atomic_read(&bhp->ref) == 0);
+				atomic_inc(env, &bhp->ref);
+				mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
+				if ((ret = __memp_bhfree(dbmp, new_infop,
+				    mfp, new_hp, bhp, BH_FREE_FREEMEM)) != 0) {
+					MUTEX_UNLOCK(env, new_hp->mtx_hash);
+					return (ret);
+				}
 
-		if (bucket != new_bucket) {
-			/*
-			 * There is no way that an old buffer can be locked
-			 * after a split, since everyone will look for it in
-			 * the new hash bucket.
-			 */
-			DB_ASSERT(env, !F_ISSET(bhp, BH_DIRTY) &&
-			    atomic_read(&bhp->ref) == 0);
-			atomic_inc(env, &bhp->ref);
-			mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
-			if ((ret = __memp_bhfree(dbmp, new_infop,
-			    mfp, new_hp, bhp, BH_FREE_FREEMEM)) != 0) {
-				MUTEX_UNLOCK(env, new_hp->mtx_hash);
-				return (ret);
+				/*
+				 * The free has modified the list of buffers and
+				 * dropped the mutex.  We need to start again.
+				 */
+				goto free_old;
 			}
-
-			/*
-			 * The free has modified the list of buffers and
-			 * dropped the mutex.  We need to start again.
-			 */
-			goto free_old;
 		}
+		MUTEX_UNLOCK(env, new_hp->mtx_hash);
 	}
-	MUTEX_UNLOCK(env, new_hp->mtx_hash);
 
 	/*
 	 * Before we begin, make sure that all of the buffers we care about are
@@ -305,7 +312,9 @@ err:			atomic_dec(env, &bhp->ref);
 				    next_bhp, alloc_bhp, vc, __bh);
 		}
 
-		DB_ASSERT(env, new_hp->mtx_hash != old_hp->mtx_hash);
+		/* The mutexes must be different, unless they aren't in use. */
+		DB_ASSERT(env, new_hp->mtx_hash != old_hp->mtx_hash ||
+		    new_hp->mtx_hash == MUTEX_INVALID);
 		MUTEX_LOCK(env, new_hp->mtx_hash);
 		SH_TAILQ_INSERT_TAIL(&new_hp->hash_bucket, new_bhp, hq);
 		if (F_ISSET(new_bhp, BH_DIRTY))
@@ -362,16 +371,15 @@ __memp_add_region(dbmp)
 	MPOOL *mp;
 	REGINFO *infop;
 	int ret;
-	roff_t cache_size, reg_size;
+	roff_t reg_size;
 	u_int i;
 	u_int32_t *regids;
 
 	env = dbmp->env;
 	mp = dbmp->reginfo[0].primary;
-	cache_size = (roff_t)mp->gbytes * GIGABYTE + mp->bytes;
 
 	/* All cache regions are the same size. */
-	reg_size = dbmp->reginfo[0].rp->size;
+	reg_size = dbmp->reginfo[0].rp->max;
 	ret = 0;
 
 	infop = &dbmp->reginfo[mp->nreg];
@@ -384,9 +392,6 @@ __memp_add_region(dbmp)
 	if ((ret = __memp_init(env,
 	    dbmp, mp->nreg, mp->htab_buckets, mp->max_nreg)) != 0)
 		return (ret);
-	cache_size += reg_size;
-	mp->gbytes = (u_int32_t)(cache_size / GIGABYTE);
-	mp->bytes = (u_int32_t)(cache_size % GIGABYTE);
 	regids = R_ADDR(dbmp->reginfo, mp->regids);
 	regids[mp->nreg++] = infop->id;
 
@@ -425,16 +430,13 @@ __memp_remove_region(dbmp)
 {
 	DB_MPOOL_HASH *hp;
 	ENV *env;
-	MPOOL *mp;
+	MPOOL *mp, *c_mp;
 	REGINFO *infop;
 	int ret;
-	roff_t cache_size, reg_size;
 	u_int i;
 
 	env = dbmp->env;
 	mp = dbmp->reginfo[0].primary;
-	reg_size = dbmp->reginfo[0].rp->size;
-	cache_size = (roff_t)mp->gbytes * GIGABYTE + mp->bytes;
 	ret = 0;
 
 	if (mp->nreg == 1) {
@@ -448,21 +450,36 @@ __memp_remove_region(dbmp)
 			return (ret);
 
 	/* Detach from the region then destroy it. */
-	infop = &dbmp->reginfo[mp->nreg];
+	infop = &dbmp->reginfo[mp->nreg - 1];
+	c_mp = infop->primary;
+	hp = R_ADDR(infop, c_mp->htab);
+	/*
+	 * For private enviroment, we need to free everything, and
+	 * for non-private environment, we need to refresh the mutexes
+	 * so that they can be in a ready state for later resize.
+	 */
 	if (F_ISSET(env, ENV_PRIVATE)) {
-		hp = R_ADDR(infop, ((MPOOL*)infop->primary)->htab);
-		for (i = 0; i < env->dbenv->mp_mtxcount; i++)
-			if ((ret = __mutex_free(env, &hp[i].mtx_hash)) != 0)
+		if ((ret = __memp_region_bhfree(infop)) != 0)
+			return (ret);
+		if (MUTEX_ON(env)) {
+			DB_ASSERT(env,
+			    env->dbenv->mp_mtxcount == mp->htab_mutexes);
+			for (i = 0; i < mp->htab_mutexes; i++)
+				if ((ret = __mutex_free(env,
+				    &hp[i].mtx_hash)) != 0)
+					return (ret);
+		}
+		__env_alloc_free(infop, hp);
+	} else if (MUTEX_ON(env)) {
+		DB_ASSERT(env, env->dbenv->mp_mtxcount == mp->htab_mutexes);
+		for (i = 0; i < mp->htab_mutexes; i++)
+			if ((ret = __mutex_refresh(env, hp[i].mtx_hash)) != 0)
 				return (ret);
 	}
 
 	ret = __env_region_detach(env, infop, 1);
-	if  (ret == 0) {
+	if  (ret == 0)
 		mp->nreg--;
-		cache_size -= reg_size;
-		mp->gbytes = (u_int32_t)(cache_size / GIGABYTE);
-		mp->bytes = (u_int32_t)(cache_size % GIGABYTE);
-	}
 
 	return (ret);
 }
@@ -511,6 +528,9 @@ __memp_map_regions(dbmp)
 }
 
 /*
+ * __memp_resize --
+ *      Change the overall cache size by adding or removing cache regions.
+ *
  * PUBLIC: int __memp_resize __P((DB_MPOOL *, u_int32_t, u_int32_t));
  */
 int
@@ -526,7 +546,7 @@ __memp_resize(dbmp, gbytes, bytes)
 
 	env = dbmp->env;
 	mp = dbmp->reginfo[0].primary;
-	reg_size = dbmp->reginfo[0].rp->size;
+	reg_size = dbmp->reginfo[0].rp->max;
 	total_size = (roff_t)gbytes * GIGABYTE + bytes;
 	ncache = (u_int32_t)((total_size + reg_size / 2) / reg_size);
 
@@ -546,6 +566,9 @@ __memp_resize(dbmp, gbytes, bytes)
 		    __memp_add_region(dbmp) :
 		    __memp_remove_region(dbmp))) != 0)
 			break;
+	total_size = reg_size * (roff_t)mp->nreg;
+	mp->gbytes = (u_int32_t)(total_size / GIGABYTE);
+	mp->bytes = (u_int32_t)(total_size % GIGABYTE);
 	MUTEX_UNLOCK(env, mp->mtx_resize);
 
 	return (ret);
@@ -567,13 +590,13 @@ __memp_get_cache_max(dbenv, max_gbytesp, max_bytesp)
 	env = dbenv->env;
 
 	ENV_NOT_CONFIGURED(env,
-	    env->mp_handle, "DB_ENV->get_mp_max_ncache", DB_INIT_MPOOL);
+	    env->mp_handle, "DB_ENV->get_cache_max", DB_INIT_MPOOL);
 
 	if (MPOOL_ON(env)) {
 		/* Cannot be set after open, no lock required to read. */
 		dbmp = env->mp_handle;
 		mp = dbmp->reginfo[0].primary;
-		reg_size = dbmp->reginfo[0].rp->size;
+		reg_size = dbmp->reginfo[0].rp->max;
 		max_size = mp->max_nreg * reg_size;
 		*max_gbytesp = (u_int32_t)(max_size / GIGABYTE);
 		*max_bytesp = (u_int32_t)(max_size % GIGABYTE);
diff --git a/src/mp/mp_stat.c b/src/mp/mp_stat.c
index 246b44d7..81ea35c1 100644
--- a/src/mp/mp_stat.c
+++ b/src/mp/mp_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -133,7 +133,14 @@ __memp_stat(env, gspp, fspp, flags)
 			sp->st_ro_evict += c_mp->stat.st_ro_evict;
 			sp->st_rw_evict += c_mp->stat.st_rw_evict;
 			sp->st_page_trickle += c_mp->stat.st_page_trickle;
+			sp->st_mvcc_reused += c_mp->stat.st_mvcc_reused;
 			sp->st_pages += c_mp->pages;
+			/* Undocumented field used by tests only. */
+			sp->st_oddfsize_detect +=
+			    c_mp->stat.st_oddfsize_detect;
+			/* Undocumented field used by tests only. */
+			sp->st_oddfsize_resolve +=
+			    c_mp->stat.st_oddfsize_resolve;
 			/*
 			 * st_page_dirty	calculated by __memp_stat_hash
 			 * st_page_clean	calculated here
@@ -195,7 +202,12 @@ __memp_stat(env, gspp, fspp, flags)
 
 		/* Count the MPOOLFILE structures. */
 		i = 0;
-		len = 0;
+		/*
+		 * Allow space for the first __memp_get_files() to align the
+		 * structure array to uintmax_t, DB_MPOOL_STAT's most
+		 * restrictive field.  [#23150]
+		 */
+		len = sizeof(uintmax_t);
 		if ((ret = __memp_walk_files(env,
 		     mp, __memp_count_files, &len, &i, flags)) != 0)
 			return (ret);
@@ -252,6 +264,11 @@ __memp_file_stats(env, mfp, argp, countp, flags)
 	return (0);
 }
 
+/*
+ * __memp_count_files --
+ *	This __memp_walk_files() iterator counts the number of files as well as
+ *	the space needed for their statistics, including file names.
+ */
 static int
 __memp_count_files(env, mfp, argp, countp, flags)
 	ENV *env;
@@ -277,13 +294,25 @@ __memp_count_files(env, mfp, argp, countp, flags)
 
 /*
  * __memp_get_files --
- *	get file specific statistics
+ *	get another file's specific statistics
  *
- * Build each individual entry.  We assume that an array of pointers are
- * aligned correctly to be followed by an array of structures, which should
- * be safe (in this particular case, the first element of the structure
- * is a pointer, so we're doubly safe).  The array is followed by space
- * for the text file names.
+ * Add a file statistics entry to the current list. The chunk of memory
+ * starts with an array of DB_MPOOL_FSTAT pointers, a null pointer to mark
+ * the last one, then an aligned array of DB_MPOOL_FSTAT structures, then
+ * characters space for the file names.
+ *	+-----------------------------------------------+
+ *	| count * DB_MPOOL_FSTAT pointers		|
+ *	+-----------------------------------------------+
+ *	| null pointer					+
+ *	+-----------------------------------------------|
+ *	| [space for aligning DB_MPOOL_FSTAT array]	|
+ *	+-----------------------------------------------+
+ *	| count * DB_MPOOL_FSTAT structs		|
+ *	+-----------------------------------------------+
+ *	| first file name | second file name | third... |
+ *	+-----------------------------------------------+
+ *	| file name | ...				|
+ *	+-----------------------------------------------+
  */
 static int
 __memp_get_files(env, mfp, argp, countp, flags)
@@ -305,11 +334,21 @@ __memp_get_files(env, mfp, argp, countp, flags)
 	tfsp = *(DB_MPOOL_FSTAT ***)argp;
 
 	if (*tfsp == NULL) {
-		/* Add 1 to count because we need to skip over the NULL. */
-		tstruct = (DB_MPOOL_FSTAT *)(tfsp + *countp + 1);
-		tname = (char *)(tstruct + *countp);
+		/*
+		 * Add 1 to count because to skip over the NULL end marker.
+		 * Align it further for DB_MPOOL_STAT's most restrictive field
+		 * because uintmax_t might require stricter alignment than
+		 * pointers; e.g., IP32 LL64 SPARC. [#23150]
+		 */
+		tstruct = (DB_MPOOL_FSTAT *)&tfsp[*countp + 1];
+		tstruct = ALIGNP_INC(tstruct, sizeof(uintmax_t));
+		tname = (char *)&tstruct[*countp];
 		*tfsp = tstruct;
 	} else {
+		/*
+		 * This stat struct follows the previous one; the file name
+		 * follows the previous entry's filename.
+		 */
 		tstruct = *tfsp + 1;
 		tname = (*tfsp)->file_name + strlen((*tfsp)->file_name) + 1;
 		*++tfsp = tstruct;
@@ -486,6 +525,8 @@ __memp_print_stats(env, flags)
 	    (u_long)gsp->st_mvcc_thawed);
 	__db_dl(env, "The number of frozen buffers freed",
 	    (u_long)gsp->st_mvcc_freed);
+	__db_dl(env, "The number of outdated intermediate versions reused",
+	    (u_long)gsp->st_mvcc_reused);
 	__db_dl(env, "The number of page allocations", (u_long)gsp->st_alloc);
 	__db_dl(env,
 	    "The number of hash buckets examined during allocations",
@@ -744,11 +785,18 @@ __memp_print_hash(env, dbmp, reginfo, fmap, flags)
 			    vbhp != NULL;
 			    vbhp = SH_CHAIN_PREV(vbhp, vc, __bh)) {
 				__memp_print_bh(env, dbmp,
-				    " next:\t", vbhp, fmap);
+				    " prev:\t", vbhp, fmap);
 			}
 		}
 		MUTEX_UNLOCK(env, hp->mtx_hash);
 	}
+#ifdef DIAGNOSTIC
+	SH_TAILQ_FOREACH(bhp, &c_mp->free_frozen, hq, __bh) {
+		__db_msg(env, "free frozen %lu pgno %lu mtx_buf %lu",
+		    (u_long)R_OFFSET(dbmp->reginfo, bhp),
+		    (u_long)bhp->pgno, (u_long)bhp->mtx_buf);
+	}
+#endif
 
 	return (0);
 }
@@ -775,6 +823,7 @@ __memp_print_bh(env, dbmp, prefix, bhp, fmap)
 		{ BH_FROZEN,		"frozen" },
 		{ BH_TRASH,		"trash" },
 		{ BH_THAWED,		"thawed" },
+		{ BH_UNREACHABLE,	"unreachable" },
 		{ 0,			NULL }
 	};
 	DB_MSGBUF mb;
diff --git a/src/mp/mp_sync.c b/src/mp/mp_sync.c
index fa06b1d4..82d5c8de 100644
--- a/src/mp/mp_sync.c
+++ b/src/mp/mp_sync.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -95,9 +95,11 @@ __memp_discard_all_mpfs (env, mp)
 		while ((mfp = SH_TAILQ_FIRST(
 		    &hp->hash_bucket, __mpoolfile)) != NULL) {
 			MUTEX_LOCK(env, mfp->mutex);
-			if ((t_ret = __memp_mf_discard(dbmp, mfp, 1)) != 0 &&
-			    ret == 0)
-				ret = t_ret;
+			if ((t_ret = __memp_mf_discard(dbmp, mfp, 1)) != 0) {
+				if (ret == 0)
+					ret = t_ret;
+				break;
+			}
 		}
 		MUTEX_UNLOCK(env, hp->mtx_hash);
 	}
@@ -837,6 +839,7 @@ __memp_mf_sync(dbmp, mfp, locked)
 	MPOOLFILE *mfp;
 	int locked;
 {
+	APPNAME appname;
 	DB_FH *fhp;
 	DB_MPOOL_HASH *hp;
 	ENV *env;
@@ -846,6 +849,7 @@ __memp_mf_sync(dbmp, mfp, locked)
 
 	COMPQUIET(hp, NULL);
 	env = dbmp->env;
+	appname = DB_APP_DATA;
 
 	/*
 	 * We need to be holding the hash lock: we're using the path name
@@ -859,13 +863,20 @@ __memp_mf_sync(dbmp, mfp, locked)
 		MUTEX_LOCK(env, hp->mtx_hash);
 	}
 
-	if ((ret = __db_appname(env, DB_APP_DATA,
+mpsync:	if ((ret = __db_appname(env, appname,
 	    R_ADDR(dbmp->reginfo, mfp->path_off), NULL, &rpath)) == 0) {
 		if ((ret = __os_open(env, rpath, 0, 0, 0, &fhp)) == 0) {
 			ret = __os_fsync(env, fhp);
 			if ((t_ret =
 			    __os_closehandle(env, fhp)) != 0 && ret == 0)
 				ret = t_ret;
+		} else {
+			/* We may be syncing the blob meta db. */
+			if (appname != DB_APP_BLOB) {
+				__os_free(env, rpath);
+				appname = DB_APP_BLOB;
+				goto mpsync;
+			}
 		}
 		__os_free(env, rpath);
 	}
diff --git a/src/mp/mp_trickle.c b/src/mp/mp_trickle.c
index fba528b3..ff8cb875 100644
--- a/src/mp/mp_trickle.c
+++ b/src/mp/mp_trickle.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/mutex/mut_alloc.c b/src/mutex/mut_alloc.c
index 5df3de53..06b3541e 100644
--- a/src/mutex/mut_alloc.c
+++ b/src/mutex/mut_alloc.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,9 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/log.h"
+
+static char *__mutex_action_print __P((MUTEX_ACTION));
 
 /*
  * __mutex_alloc --
@@ -35,8 +38,7 @@ __mutex_alloc(env, alloc_id, flags, indxp)
 	if (alloc_id != MTX_APPLICATION && alloc_id != MTX_MUTEX_TEST &&
 	    (F_ISSET(env->dbenv, DB_ENV_NOLOCKING) ||
 	    (!F_ISSET(env, ENV_THREAD) &&
-	    (LF_ISSET(DB_MUTEX_PROCESS_ONLY) ||
-	    F_ISSET(env, ENV_PRIVATE)))))
+	    (LF_ISSET(DB_MUTEX_PROCESS_ONLY) || F_ISSET(env, ENV_PRIVATE)))))
 		return (0);
 
 	/* Private environments never share mutexes. */
@@ -109,13 +111,17 @@ nomem:			__db_errx(env, DB_STR("2034",
 		    mtxregion->stat.st_mutex_max)
 			cnt = mtxregion->stat.st_mutex_max -
 			    mtxregion->stat.st_mutex_cnt;
+
+		/* Set i to the first newly created db_mutex_t. */
 		if (F_ISSET(env, ENV_PRIVATE)) {
 			F_SET(&mtxmgr->reginfo, REGION_TRACKED);
 			while (__env_alloc(&mtxmgr->reginfo,
 			    (cnt * mtxregion->mutex_size) +
-			    mtxregion->stat.st_mutex_align, &i) != 0)
-				if ((cnt >> 1) == 0)
+			    mtxregion->stat.st_mutex_align, &i) != 0) {
+				cnt >>= 1;
+				if (cnt == 0)
 					break;
+			}
 			F_CLR(&mtxmgr->reginfo, REGION_TRACKED);
 			i = (db_mutex_t)ALIGNP_INC(i,
 			    mtxregion->stat.st_mutex_align);
@@ -130,21 +136,16 @@ nomem:			__db_errx(env, DB_STR("2034",
 		}
 		if (cnt == 0)
 			goto nomem;
-		mutexp = MUTEXP_SET(env, i);
+
 		mtxregion->stat.st_mutex_free = cnt;
 		mtxregion->mutex_next = i;
 		mtxregion->stat.st_mutex_cnt += cnt;
-		while (--cnt > 0) {
-			mutexp->flags = 0;
-			if (F_ISSET(env, ENV_PRIVATE))
-				mutexp->mutex_next_link =
-				    (uintptr_t)(mutexp + 1);
-			else
-				mutexp->mutex_next_link = ++i;
-			mutexp++;
-		}
-		mutexp->flags = 0;
-		mutexp->mutex_next_link = MUTEX_INVALID;
+
+		/*
+		 * Now link the rest of the newly allocated db_mutex_t's into
+		 * the free list.
+		 */
+		MUTEX_BULK_INIT(env, mtxregion, i, cnt);
 	}
 
 	*indxp = mtxregion->mutex_next;
@@ -158,14 +159,12 @@ nomem:			__db_errx(env, DB_STR("2034",
 	if (mtxregion->stat.st_mutex_inuse > mtxregion->stat.st_mutex_inuse_max)
 		mtxregion->stat.st_mutex_inuse_max =
 		    mtxregion->stat.st_mutex_inuse;
-	if (locksys)
-		MUTEX_SYSTEM_UNLOCK(env);
 
 	/* Initialize the mutex. */
 	memset(mutexp, 0, sizeof(*mutexp));
 	F_SET(mutexp, DB_MUTEX_ALLOCATED |
-	    LF_ISSET(DB_MUTEX_LOGICAL_LOCK |
-		DB_MUTEX_PROCESS_ONLY | DB_MUTEX_SHARED));
+	    LF_ISSET(DB_MUTEX_LOGICAL_LOCK | DB_MUTEX_PROCESS_ONLY |
+		DB_MUTEX_SELF_BLOCK | DB_MUTEX_SHARED));
 
 	/*
 	 * If the mutex is associated with a single process, set the process
@@ -182,7 +181,9 @@ nomem:			__db_errx(env, DB_STR("2034",
 #endif
 
 	if ((ret = __mutex_init(env, *indxp, flags)) != 0)
-		(void)__mutex_free_int(env, locksys, indxp);
+		(void)__mutex_free_int(env, 0, indxp);
+	if (locksys)
+		MUTEX_SYSTEM_UNLOCK(env);
 
 	return (ret);
 }
@@ -262,6 +263,44 @@ __mutex_free_int(env, locksys, indxp)
 	return (ret);
 }
 
+#ifdef HAVE_FAILCHK_BROADCAST
+/*
+ * __mutex_died --
+ *	Announce that a mutex request couldn't been granted because the last
+ *	thread to own it was killed by failchk. Sets ENV_DEAD_MUTEX in the
+ *	possibly shared environment so that mutex unlock calls don't complain.
+ *
+ *
+ * PUBLIC: int __mutex_died __P((ENV *, db_mutex_t));
+ */
+int
+__mutex_died(env, mutex)
+	ENV *env;
+	db_mutex_t mutex;
+{
+	DB_ENV *dbenv;
+	DB_EVENT_MUTEX_DIED_INFO info;
+	DB_MUTEX *mutexp;
+	char tidstr[DB_THREADID_STRLEN], failmsg[DB_FAILURE_SYMPTOM_SIZE];
+
+	dbenv = env->dbenv;
+
+	mutexp = MUTEXP_SET(env, mutex);
+	info.mutex = mutex;
+	info.pid = mutexp->pid;
+	info.tid = mutexp->tid;
+	(void)dbenv->thread_id_string(dbenv, mutexp->pid, mutexp->tid, tidstr);
+	(void)__mutex_describe(env, mutex, info.desc);
+	(void)snprintf(failmsg, sizeof(failmsg), DB_STR_A("2073",
+	    "Mutex died: %s owned %s", "%s %s"), tidstr, info.desc);
+	__db_errx(env, "%s", failmsg);
+	/* If this is the first crashed process, save its description. */
+	(void)__env_failure_remember(env, failmsg);
+	DB_EVENT(env, DB_EVENT_MUTEX_DIED, &info);
+	return (__env_panic(env, USR_ERR(env, DB_RUNRECOVERY)));
+}
+#endif
+
 /*
  * __mutex_refresh --
  *	Reinitialize a mutex, if we are not sure of its state.
@@ -289,3 +328,154 @@ __mutex_refresh(env, mutex)
 	}
 	return (ret);
 }
+
+/*
+ * __mutex_record_lock --
+ *	Record that this thread is about to lock a latch.
+ *	The last parameter is updated to point to this mutex's entry in the
+ *	per-thread mutex state array, so that it can update it if it gets the
+ *	mutex, or free it if the mutex is not acquired (e.g. it times out).
+ *	Mutexes which can be unlocked by other threads are not placed in this
+ *	list, because it would be too costly for that other thread to to find
+ *	the right slot to clear. The caller has already checked that thread
+ *	tracking is enabled.
+ *
+ * PUBLIC: int __mutex_record_lock
+ * PUBLIC:     __P((ENV *, db_mutex_t, MUTEX_ACTION, MUTEX_STATE **));
+ */
+int
+__mutex_record_lock(env, mutex, action, retp)
+	ENV *env;
+	db_mutex_t mutex;
+	MUTEX_ACTION action;
+	MUTEX_STATE **retp;
+{
+	DB_MUTEX *mutexp;
+	DB_THREAD_INFO *ip;
+	int i, ret;
+
+	*retp = NULL;
+	mutexp = MUTEXP_SET(env, mutex);
+	if (!F_ISSET(mutexp, DB_MUTEX_SHARED))
+		return (0);
+	if ((ret = __env_set_state(env, &ip, THREAD_VERIFY)) != 0)
+		return (ret);
+	for (i = 0; i != MUTEX_STATE_MAX; i++) {
+		if (ip->dbth_latches[i].action == MUTEX_ACTION_UNLOCKED) {
+			ip->dbth_latches[i].mutex = mutex;
+			ip->dbth_latches[i].action = action;
+#ifdef DIAGNOSTIC
+			__os_gettime(env, &ip->dbth_latches[i].when, 0);
+#endif
+			*retp = &ip->dbth_latches[i];
+			return (0);
+		}
+	}
+	__db_errx(env, DB_STR_A("2074",
+	    "No space available in latch table for %lu", "%lu"), (u_long)mutex);
+	(void)__mutex_record_print(env, ip);
+	return (__env_panic(env, USR_ERR(env, DB_RUNRECOVERY)));
+}
+
+/*
+ * __mutex_record_unlock --
+ *	Verify that this thread owns the mutex it is about to unlock.
+ *
+ * PUBLIC: int __mutex_record_unlock __P((ENV *, db_mutex_t));
+ */
+int
+__mutex_record_unlock(env, mutex)
+	ENV *env;
+	db_mutex_t mutex;
+{
+	DB_MUTEX *mutexp;
+	DB_THREAD_INFO *ip;
+	int i, ret;
+
+	if (env->thr_hashtab == NULL)
+		return (0);
+	mutexp = MUTEXP_SET(env, mutex);
+	if (!F_ISSET(mutexp, DB_MUTEX_SHARED))
+		return (0);
+	if ((ret = __env_set_state(env, &ip, THREAD_VERIFY)) != 0)
+		return (ret);
+	for (i = 0; i != MUTEX_STATE_MAX; i++) {
+		if (ip->dbth_latches[i].mutex == mutex &&
+		    ip->dbth_latches[i].action != MUTEX_ACTION_UNLOCKED) {
+			ip->dbth_latches[i].action = MUTEX_ACTION_UNLOCKED;
+			return (0);
+		}
+	}
+	(void)__mutex_record_print(env, ip);
+	if (ip->dbth_state == THREAD_FAILCHK) {
+		DB_DEBUG_MSG(env, "mutex_record_unlock %lu by failchk thread",
+		    (u_long)mutex);
+		return (0);
+	}
+	__db_errx(env, DB_STR_A("2075",
+	    "Latch %lu was not held", "%lu"), (u_long)mutex);
+	return (__env_panic(env, USR_ERR(env, DB_RUNRECOVERY)));
+}
+
+static char *
+__mutex_action_print(action)
+	MUTEX_ACTION action;
+{
+	switch (action) {
+	case MUTEX_ACTION_UNLOCKED:
+		return ("unlocked");
+	case MUTEX_ACTION_INTEND_SHARE:
+		return ("waiting to share");
+	case MUTEX_ACTION_SHARED:
+		return ("sharing");
+	default:
+		return ("unknown");
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * __mutex_record_print --
+ *	Display the thread's mutex state via __db_msg(), including any
+ *	information which would be relevant for db_stat or diagnostic messages.
+ *
+ * PUBLIC: int __mutex_record_print __P((ENV *, DB_THREAD_INFO *));
+ */
+int
+__mutex_record_print(env, ip)
+	ENV *env;
+	DB_THREAD_INFO *ip;
+{
+	DB_MSGBUF mb, *mbp;
+	db_mutex_t mutex;
+	int i;
+	char desc[DB_MUTEX_DESCRIBE_STRLEN];
+	char time_buf[CTIME_BUFLEN];
+
+	DB_MSGBUF_INIT(&mb);
+	mbp = &mb;
+	for (i = 0; i != MUTEX_STATE_MAX; i++) {
+		if (ip->dbth_latches[i].action == MUTEX_ACTION_UNLOCKED)
+			continue;
+		if ((mutex = ip->dbth_latches[i].mutex) ==
+		    MUTEX_INVALID)
+			continue;
+		time_buf[4] = '\0';
+#ifdef DIAGNOSTIC
+		if (timespecisset(&ip->dbth_latches[i].when))
+			(void)__db_ctimespec(&ip->dbth_latches[i].when,
+			    time_buf);
+		else
+#endif
+			time_buf[0] = '\0';
+
+		__db_msgadd(env, mbp, "%s %s %s ",
+		    __mutex_describe(env, mutex, desc),
+		    __mutex_action_print(ip->dbth_latches[i].action), time_buf);
+#ifdef HAVE_STATISTICS
+		__mutex_print_debug_stats(env, mbp, mutex, 0);
+#endif
+		DB_MSGBUF_FLUSH(env, mbp);
+	}
+	return (0);
+}
diff --git a/src/mutex/mut_failchk.c b/src/mutex/mut_failchk.c
index 1425389f..28e5d992 100644
--- a/src/mutex/mut_failchk.c
+++ b/src/mutex/mut_failchk.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,68 +9,193 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/lock.h"
+
+static int __mutex_failchk_single __P((ENV *, db_mutex_t, DB_THREAD_INFO *));
 
 /*
- * __mut_failchk --
- *	Check for mutexes held by dead processes.
+ * __mutex_failchk --
+ *	Clean up after dead processes which left behind allocated per-process or
+ *	locked mutexes.
  *
- * PUBLIC: int __mut_failchk __P((ENV *));
+ * PUBLIC: int __mutex_failchk __P((ENV *));
  */
 int
-__mut_failchk(env)
+__mutex_failchk(env)
 	ENV *env;
 {
-	DB_ENV *dbenv;
-	DB_MUTEX *mutexp;
+	DB_HASHTAB *htab;
 	DB_MUTEXMGR *mtxmgr;
 	DB_MUTEXREGION *mtxregion;
-	db_mutex_t i;
-	int ret;
-	char buf[DB_THREADID_STRLEN];
-	db_threadid_t unused;
+	DB_THREAD_INFO *ip;
+	db_mutex_t mutex;
+	unsigned i;
+	int count;
 
-	if (F_ISSET(env, ENV_PRIVATE))
+	if (F_ISSET(env, ENV_PRIVATE) || (htab = env->thr_hashtab) == NULL)
 		return (0);
 
-	DB_THREADID_INIT(unused);
-
-	dbenv = env->dbenv;
 	mtxmgr = env->mutex_handle;
 	mtxregion = mtxmgr->reginfo.primary;
-	ret = 0;
+	count = 0;
 
+	DB_ASSERT(env, F_ISSET(env->dbenv, DB_ENV_FAILCHK));
 	MUTEX_SYSTEM_LOCK(env);
-	for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i, ++mutexp) {
-		mutexp = MUTEXP_SET(env, i);
 
-		/*
-		 * We're looking for per-process mutexes where the process
-		 * has died.
-		 */
-		if (!F_ISSET(mutexp, DB_MUTEX_ALLOCATED) ||
-		    !F_ISSET(mutexp, DB_MUTEX_PROCESS_ONLY))
+	/*
+	 * The first loop does each thread's read-locked latches; the second
+	 * does all locked mutexes.
+	 */
+	for (i = 0; i < env->thr_nbucket; i++)
+		SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) {
+			if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE)
+				continue;
+			count += __mutex_failchk_thread(env, ip);
+		}
+
+	for (mutex = 1; mutex <= mtxregion->stat.st_mutex_cnt; mutex++)
+		if (__mutex_failchk_single(env, mutex, NULL) != 0)
+			count++;
+
+	MUTEX_SYSTEM_UNLOCK(env);
+
+	if (count == 0)
+		return (count);
+	else
+		return (USR_ERR(env, DB_RUNRECOVERY));
+}
+
+/*
+ * __mutex_failchk_thread -
+ *	Do the per-latch failchk work on each of this thread's shared latches.
+ *
+ * PUBLIC: int __mutex_failchk_thread __P((ENV *, DB_THREAD_INFO *));
+ */
+int
+__mutex_failchk_thread(env, ip)
+	ENV *env;
+	DB_THREAD_INFO *ip;
+{
+	db_mutex_t mutex;
+	int count, i;
+
+	count = 0;
+	for (i = 0; i != MUTEX_STATE_MAX; i++) {
+		if (ip->dbth_latches[i].action == MUTEX_ACTION_UNLOCKED ||
+		    (mutex = ip->dbth_latches[i].mutex) == MUTEX_INVALID)
 			continue;
+		if (__mutex_failchk_single(env, mutex, ip) != 0)
+			count++;
+	}
+	return (count);
+}
 
+/*
+ * __mutex_failchk_single --
+ *	Determine whether this mutex is locked or shared by a potentially
+ *	dead thread. If so, and the call to is_alive() finds that it is dead,
+ *	clean up if possible (a process-only mutex); else wake up any waiters.
+ */
+static int
+__mutex_failchk_single(env, mutex, ip)
+	ENV *env;
+	db_mutex_t mutex;
+	DB_THREAD_INFO *ip;
+{
+	DB_ENV *dbenv;
+	DB_MUTEX *mutexp;
+	db_threadid_t threadid;
+	pid_t pid;
+	int already_dead, ret;
+	u_int32_t flags;
+	char id_str[DB_THREADID_STRLEN];
+	char mtx_desc[DB_MUTEX_DESCRIBE_STRLEN];
+
+	dbenv = env->dbenv;
+	mutexp = MUTEXP_SET(env, mutex);
+	flags = mutexp->flags;
+	/*
+	 * Filter out mutexes which couldn't possibly be "interesting", in order
+	 * to reduce the number of possibly costly is_alive() calls. Check that:
+	 *	it is allocated
+	 *	is it either locked, or a shared latch, or a per-process mutex
+	 *	it is nether a logical lock, nor self-block, nor already dead.
+	 * Self-blocking mutexes are skipped because it is expected that they
+	 * can still be locked even though they are really 'idle', as with
+	 * the wait case in __lock_get_internal(), LOG->free_commits, and
+	 * __rep_waiter->mtx_repwait; or they were allocated by the application.
+	 */
+	if (!LF_ISSET(DB_MUTEX_ALLOCATED))
+		return (0);
+	if (!LF_ISSET(
+	    DB_MUTEX_SHARED | DB_MUTEX_LOCKED | DB_MUTEX_PROCESS_ONLY))
+		return (0);
+	if (LF_ISSET(
+	    DB_MUTEX_SELF_BLOCK | DB_MUTEX_LOGICAL_LOCK | DB_MUTEX_OWNER_DEAD))
+		return (0);
+
+	already_dead = ip != NULL && timespecisset(&ip->dbth_failtime);
+	/*
+	 * The pid in the mutex is valid when for locked or per-process mutexes.
+	 * The tid is correct only when exclusively locked. It's okay to look at
+	 * the tid of an unlocked per-process mutex, we won't use it in the
+	 * is_alive() call.
+	 */
+	if (LF_ISSET(DB_MUTEX_LOCKED | DB_MUTEX_PROCESS_ONLY)) {
+		pid = mutexp->pid;
+		threadid = mutexp->tid;
+	} else {
+		DB_ASSERT(env, LF_ISSET(DB_MUTEX_SHARED));
 		/*
-		 * The thread that allocated the mutex may have exited, but
-		 * we cannot reclaim the mutex if the process is still alive.
+		 * If we get here with no thread, then this is an shared latch
+		 * which is neither locked nor shared, we're done with it.
 		 */
-		if (dbenv->is_alive(
-		    dbenv, mutexp->pid, unused, DB_MUTEX_PROCESS_ONLY))
-			continue;
+		if (ip == NULL)
+			return (0);
+		pid = ip->dbth_pid;
+		threadid = ip->dbth_tid;
+	}
+	if (!already_dead && dbenv->is_alive(dbenv,
+	    pid, threadid, LF_ISSET(DB_MUTEX_PROCESS_ONLY)))
+		return (0);
+
+	/* The thread is dead; the mutex type indicates the kind of cleanup. */
+	(void)dbenv->thread_id_string(dbenv, pid, threadid, id_str);
+	(void)__mutex_describe(env, mutex, mtx_desc);
 
-		__db_msg(env, DB_STR_A("2017",
-		    "Freeing mutex for process: %s", "%s"),
-		    dbenv->thread_id_string(dbenv, mutexp->pid, unused, buf));
+	if (LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
+		if (already_dead)
+			return (0);
+
+		__db_errx(env, DB_STR_A("2065",
+		    "Freeing %s for process: %s", "%s %s"), mtx_desc, id_str);
+
+		/* Clear the mutex id if it is in a cached locker. */
+		if ((ret = __lock_local_locker_invalidate(env, mutex)) != 0)
+			return (ret);
 
 		/* Unlock and free the mutex. */
-		if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
-			MUTEX_UNLOCK(env, i);
+		if (LF_ISSET(DB_MUTEX_LOCKED))
+			MUTEX_UNLOCK(env, mutex);
 
-		if ((ret = __mutex_free_int(env, 0, &i)) != 0)
-			break;
+		return (__mutex_free_int(env, 0, &mutex));
 	}
-	MUTEX_SYSTEM_UNLOCK(env);
-
-	return (ret);
+#ifdef HAVE_FAILCHK_BROADCAST
+	else if (LF_ISSET(DB_MUTEX_LOCKED)) {
+		__db_errx(env, DB_STR_A("2066",
+		    "Marking %s as owned by dead thread %s", "%lu %s"),
+		    mtx_desc, id_str);
+		F_SET(mutexp, DB_MUTEX_OWNER_DEAD);
+	} else if (LF_ISSET(DB_MUTEX_SHARED)) {
+		__db_errx(env, DB_STR_A("2067",
+		    "Marking %s as shared by dead thread %s", "%lu %s"),
+		    mtx_desc, id_str);
+		F_SET(mutexp, DB_MUTEX_OWNER_DEAD);
+	} else {
+		__db_errx(env, DB_STR_A("2068",
+	"mutex_failchk: unknown state for %s with dead thread %s", "%lu %s"),
+		    mtx_desc, id_str);
+	}
+#endif
+	return (USR_ERR(env, DB_RUNRECOVERY));
 }
diff --git a/src/mutex/mut_fcntl.c b/src/mutex/mut_fcntl.c
deleted file mode 100644
index 0694aa59..00000000
--- a/src/mutex/mut_fcntl.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-
-static inline int __db_fcntl_mutex_lock_int
-	    __P((ENV *, db_mutex_t, db_timeout_t, int));
-
-/*
- * __db_fcntl_mutex_init --
- *	Initialize a fcntl mutex.
- *
- * PUBLIC: int __db_fcntl_mutex_init __P((ENV *, db_mutex_t, u_int32_t));
- */
-int
-__db_fcntl_mutex_init(env, mutex, flags)
-	ENV *env;
-	db_mutex_t mutex;
-	u_int32_t flags;
-{
-	COMPQUIET(env, NULL);
-	COMPQUIET(mutex, MUTEX_INVALID);
-	COMPQUIET(flags, 0);
-
-	return (0);
-}
-
-/*
- * __db_fcntl_mutex_lock_int
- *	Internal function to lock a mutex, blocking only when requested
- */
-inline int
-__db_fcntl_mutex_lock_int(env, mutex, timeout, wait)
-	ENV *env;
-	db_mutex_t mutex;
-	db_timeout_t timeout;
-	int wait;
-{
-	DB_ENV *dbenv;
-	DB_MUTEX *mutexp;
-	DB_THREAD_INFO *ip;
-	struct flock k_lock;
-	int locked, ms, ret;
-	db_timespec now, timespec;
-	db_timeout_t time_left;
-
-	dbenv = env->dbenv;
-
-	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
-		return (0);
-
-	mutexp = MUTEXP_SET(env, mutex);
-
-	CHECK_MTX_THREAD(env, mutexp);
-
-#ifdef HAVE_STATISTICS
-	if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
-		++mutexp->mutex_set_wait;
-	else
-		++mutexp->mutex_set_nowait;
-#endif
-
-	/* Initialize the lock. */
-	k_lock.l_whence = SEEK_SET;
-	k_lock.l_start = mutex;
-	k_lock.l_len = 1;
-
-	if (timeout != 0) {
-		timespecclear(&timespec);
-		__clock_set_expires(env, &timespec, timeout);
-	}
-
-	/*
-	 * Only check the thread state once, by initializing the thread
-	 * control block pointer to null.  If it is not the failchk
-	 * thread, then ip will have a valid value subsequent times
-	 * in the loop.
-	 */
-	ip = NULL;
-
-	for (locked = 0;;) {
-		/*
-		 * Wait for the lock to become available; wait 1ms initially,
-		 * up to 1 second.
-		 */
-		for (ms = 1; F_ISSET(mutexp, DB_MUTEX_LOCKED);) {
-			if (F_ISSET(dbenv, DB_ENV_FAILCHK) &&
-			    ip == NULL && dbenv->is_alive(dbenv,
-			    mutexp->pid, mutexp->tid, 0) == 0) {
-				ret = __env_set_state(env, &ip, THREAD_VERIFY);
-				if (ret != 0 ||
-				    ip->dbth_state == THREAD_FAILCHK)
-					return (DB_RUNRECOVERY);
-			}
-			if (!wait)
-				return (DB_LOCK_NOTGRANTED);
-			if (timeout != 0) {
-				timespecclear(&now);
-				if (__clock_expired(env, &now, &timespec))
-					return (DB_TIMEOUT);
-				DB_TIMESPEC_TO_TIMEOUT(time_left, &now, 0);
-				time_left = timeout - time_left;
-				if (ms * US_PER_MS > time_left)
-					ms = time_left / US_PER_MS;
-			}
-			__os_yield(NULL, 0, ms * US_PER_MS);
-			if ((ms <<= 1) > MS_PER_SEC)
-				ms = MS_PER_SEC;
-		}
-
-		/* Acquire an exclusive kernel lock on the byte. */
-		k_lock.l_type = F_WRLCK;
-		if (fcntl(env->lockfhp->fd, F_SETLKW, &k_lock))
-			goto err;
-
-		/* If the resource is still available, it's ours. */
-		if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
-			locked = 1;
-
-			F_SET(mutexp, DB_MUTEX_LOCKED);
-			dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
-		}
-
-		/* Release the kernel lock. */
-		k_lock.l_type = F_UNLCK;
-		if (fcntl(env->lockfhp->fd, F_SETLK, &k_lock))
-			goto err;
-
-		/*
-		 * If we got the resource lock we're done.
-		 *
-		 * !!!
-		 * We can't check to see if the lock is ours, because we may
-		 * be trying to block ourselves in the lock manager, and so
-		 * the holder of the lock that's preventing us from getting
-		 * the lock may be us!  (Seriously.)
-		 */
-		if (locked)
-			break;
-	}
-
-#ifdef DIAGNOSTIC
-	/*
-	 * We want to switch threads as often as possible.  Yield every time
-	 * we get a mutex to ensure contention.
-	 */
-	if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
-		__os_yield(env, 0, 0);
-#endif
-	return (0);
-
-err:	ret = __os_get_syserr();
-	__db_syserr(env, ret, DB_STR("2019", "fcntl lock failed"));
-	return (__env_panic(env, __os_posix_err(ret)));
-}
-
-/*
- * __db_fcntl_mutex_lock
- *	Lock a mutex, blocking if necessary.
- *
- * PUBLIC: int __db_fcntl_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t));
- */
-int
-__db_fcntl_mutex_lock(env, mutex, timeout)
-	ENV *env;
-	db_mutex_t mutex;
-	db_timeout_t timeout;
-{
-	return (__db_fcntl_mutex_lock_int(env, mutex, timeout, 1));
-}
-
-/*
- * __db_fcntl_mutex_trylock
- *	Try to lock a mutex, without blocking when it is busy.
- *
- * PUBLIC: int __db_fcntl_mutex_trylock __P((ENV *, db_mutex_t));
- */
-int
-__db_fcntl_mutex_trylock(env, mutex)
-	ENV *env;
-	db_mutex_t mutex;
-{
-	return (__db_fcntl_mutex_lock_int(env, mutex, 0, 0));
-}
-
-/*
- * __db_fcntl_mutex_unlock --
- *	Release a mutex.
- *
- * PUBLIC: int __db_fcntl_mutex_unlock __P((ENV *, db_mutex_t));
- */
-int
-__db_fcntl_mutex_unlock(env, mutex)
-	ENV *env;
-	db_mutex_t mutex;
-{
-	DB_ENV *dbenv;
-	DB_MUTEX *mutexp;
-
-	dbenv = env->dbenv;
-
-	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
-		return (0);
-
-	mutexp = MUTEXP_SET(env, mutex);
-
-#ifdef DIAGNOSTIC
-	if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
-		__db_errx(env, DB_STR("2020",
-		    "fcntl unlock failed: lock already unlocked"));
-		return (__env_panic(env, EACCES));
-	}
-#endif
-
-	/*
-	 * Release the resource.  We don't have to acquire any locks because
-	 * processes trying to acquire the lock are waiting for the flag to
-	 * go to 0.  Once that happens the waiters will serialize acquiring
-	 * an exclusive kernel lock before locking the mutex.
-	 */
-	F_CLR(mutexp, DB_MUTEX_LOCKED);
-
-	return (0);
-}
-
-/*
- * __db_fcntl_mutex_destroy --
- *	Destroy a mutex.
- *
- * PUBLIC: int __db_fcntl_mutex_destroy __P((ENV *, db_mutex_t));
- */
-int
-__db_fcntl_mutex_destroy(env, mutex)
-	ENV *env;
-	db_mutex_t mutex;
-{
-	COMPQUIET(env, NULL);
-	COMPQUIET(mutex, MUTEX_INVALID);
-
-	return (0);
-}
diff --git a/src/mutex/mut_method.c b/src/mutex/mut_method.c
index cb666082..99bafeae 100644
--- a/src/mutex/mut_method.c
+++ b/src/mutex/mut_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -371,6 +371,33 @@ __mutex_set_tas_spins(dbenv, tas_spins)
 	return (0);
 }
 
+#ifdef HAVE_ERROR_HISTORY
+/*
+ * __mutex_diags --
+ *
+ * PUBLIC: #ifdef HAVE_ERROR_HISTORY
+ * PUBLIC: int __mutex_diags __P((ENV *, db_mutex_t, int));
+ * PUBLIC: #endif
+ */
+int
+__mutex_diags(env, mutex, error)
+	ENV *env;
+	db_mutex_t mutex;
+	int error;
+{
+	DB_MSGBUF *mb;
+
+	if ((mb = __db_deferred_get()) != NULL) {
+		(void)__db_remember_context(env, mb, error);
+		__db_msgadd(env, mb, "Mutex %u ", (unsigned int)mutex);
+#ifdef HAVE_STATISTICS
+		__mutex_print_debug_stats(env, mb, mutex, 0);
+#endif
+	}
+	return (error);
+}
+#endif
+
 #if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT)
 /*
  * Provide atomic operations for platforms which have mutexes yet do not have
diff --git a/src/mutex/mut_pthread.c b/src/mutex/mut_pthread.c
index 1ec4fb9c..4b2cfb81 100644
--- a/src/mutex/mut_pthread.c
+++ b/src/mutex/mut_pthread.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -64,6 +64,19 @@
 } while (0)
 
 /*
+ * !!!
+ * Solaris bug workaround: pthread_cond_wait() sometimes returns ETIME  -- out
+ * of sheer paranoia, check both ETIME and ETIMEDOUT.  We believe this happens
+ * when the application uses SIGALRM for some purpose, e.g., the C library sleep
+ * call, and Solaris delivers the signal to the wrong LWP.
+ */
+#ifdef ETIME
+#define	ETIME_TO_ETIMEDOUT(ret)	((ret) == ETIME ? ETIMEDOUT : (ret))
+#else
+#define	ETIME_TO_ETIMEDOUT(ret)	(ret)
+#endif
+
+/*
  * __db_pthread_mutex_init --
  *	Initialize a pthread mutex: either a native one or
  *	just the mutex for block/wakeup of a hybrid test-and-set mutex
@@ -104,18 +117,18 @@ __db_pthread_mutex_init(env, mutex, flags)
 		pthread_rwlockattr_t rwlockattr, *rwlockattrp = NULL;
 #ifndef HAVE_MUTEX_THREAD_ONLY
 		if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
-			RET_SET((pthread_rwlockattr_init(&rwlockattr)), ret);
+			RET_SET(pthread_rwlockattr_init(&rwlockattr), ret);
 			if (ret != 0)
 				goto err;
-			RET_SET((pthread_rwlockattr_setpshared(
-			    &rwlockattr, PTHREAD_PROCESS_SHARED)), ret);
+			RET_SET(pthread_rwlockattr_setpshared(
+			    &rwlockattr, PTHREAD_PROCESS_SHARED), ret);
 			rwlockattrp = &rwlockattr;
 		}
 #endif
 
 		if (ret == 0)
-			RET_SET((pthread_rwlock_init(&mutexp->u.rwlock,
-			    rwlockattrp)), ret);
+			RET_SET(pthread_rwlock_init(&mutexp->u.rwlock,
+			    rwlockattrp), ret);
 		if (rwlockattrp != NULL)
 			(void)pthread_rwlockattr_destroy(rwlockattrp);
 
@@ -127,18 +140,18 @@ __db_pthread_mutex_init(env, mutex, flags)
 #endif
 #ifndef HAVE_MUTEX_THREAD_ONLY
 	if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
-		RET_SET((pthread_mutexattr_init(&mutexattr)), ret);
+		RET_SET(pthread_mutexattr_init(&mutexattr), ret);
 		if (ret != 0)
 			goto err;
-		RET_SET((pthread_mutexattr_setpshared(
-		    &mutexattr, PTHREAD_PROCESS_SHARED)), ret);
+		RET_SET(pthread_mutexattr_setpshared(
+		    &mutexattr, PTHREAD_PROCESS_SHARED), ret);
 		mutexattrp = &mutexattr;
 	}
 #endif
 
 	if (ret == 0)
 		RET_SET(
-		    (pthread_mutex_init(&mutexp->u.m.mutex, mutexattrp)), ret);
+		    pthread_mutex_init(&mutexp->u.m.mutex, mutexattrp), ret);
 
 	if (mutexattrp != NULL)
 		(void)pthread_mutexattr_destroy(mutexattrp);
@@ -147,19 +160,19 @@ __db_pthread_mutex_init(env, mutex, flags)
 	if (LF_ISSET(DB_MUTEX_SELF_BLOCK)) {
 #ifndef HAVE_MUTEX_THREAD_ONLY
 		if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
-			RET_SET((pthread_condattr_init(&condattr)), ret);
+			RET_SET(pthread_condattr_init(&condattr), ret);
 			if (ret != 0)
 				goto err;
 
 			condattrp = &condattr;
-			RET_SET((pthread_condattr_setpshared(
-			    &condattr, PTHREAD_PROCESS_SHARED)), ret);
+			RET_SET(pthread_condattr_setpshared(
+			    &condattr, PTHREAD_PROCESS_SHARED), ret);
 		}
 #endif
 
 		if (ret == 0)
-			RET_SET((pthread_cond_init(
-			    &mutexp->u.m.cond, condattrp)), ret);
+			RET_SET(pthread_cond_init(
+			    &mutexp->u.m.cond, condattrp), ret);
 
 		F_SET(mutexp, DB_MUTEX_SELF_BLOCK);
 		if (condattrp != NULL)
@@ -239,6 +252,9 @@ __db_pthread_mutex_prep(env, mutex, mutexp, exclusive)
 {
 	DB_ENV *dbenv;
 	DB_THREAD_INFO *ip;
+#ifdef HAVE_FAILCHK_BROADCAST
+	db_timespec timespec;
+#endif
 	int ret;
 
 	dbenv = env->dbenv;
@@ -266,13 +282,32 @@ __db_pthread_mutex_prep(env, mutex, mutexp, exclusive)
 					 * hadn't gone down the 'if
 					 * DB_ENV_FAILCHK' path to start with.
 					 */
-				    RET_SET_PTHREAD_LOCK(mutexp, ret);
-				    break;
+					goto lockit;
 				}
+				__os_yield(env, 0, 10);
 			}
 		}
-	} else
-		RET_SET_PTHREAD_LOCK(mutexp, ret);
+	} else {
+lockit:
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (dbenv->mutex_failchk_timeout != 0) {
+			timespecclear(&timespec);
+			__clock_set_expires(env,
+			    &timespec, dbenv->mutex_failchk_timeout);
+			do {
+				RET_SET_PTHREAD_TIMEDLOCK(mutexp,
+				    (struct timespec *)&timespec, ret);
+				ret = ETIME_TO_ETIMEDOUT(ret);
+				if (ret == ETIMEDOUT &&
+				    F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+				    !F_ISSET(dbenv, DB_ENV_FAILCHK))
+					ret = USR_ERR(env,
+					    __mutex_died(env, mutex));
+			} while  (ret == ETIMEDOUT);
+		} else
+#endif
+			RET_SET_PTHREAD_LOCK(mutexp, ret);
+	}
 
 	PERFMON4(env,
 	    mutex, resume, mutex, exclusive, mutexp->alloc_id, mutexp);
@@ -302,49 +337,75 @@ __db_pthread_mutex_condwait(env, mutex, mutexp, timespec)
 	DB_MUTEX *mutexp;
 	db_timespec *timespec;
 {
+	DB_ENV *dbenv;
 	int ret;
-
-#ifdef MUTEX_DIAG
-	printf("condwait %ld %x wait busy %x count %d\n",
-	    mutex, pthread_self(), MUTEXP_BUSY_FIELD(mutexp), mutexp->wait);
+#ifdef HAVE_FAILCHK_BROADCAST
+	db_timespec failchk_timespec;
 #endif
+
+	dbenv = env->dbenv;
 	PERFMON4(env, mutex, suspend, mutex, TRUE, mutexp->alloc_id, mutexp);
 
+#ifdef HAVE_FAILCHK_BROADCAST
+	/*
+	 * If the failchk timeout would be soon than the timeout passed in,
+	 * argument, use the failchk timeout. The caller handles "short" waits.
+	 */
+	if (dbenv->mutex_failchk_timeout != 0) {
+		timespecclear(&failchk_timespec);
+		__clock_set_expires(env,
+		    &failchk_timespec, dbenv->mutex_failchk_timeout);
+		if (timespec == NULL ||
+		    timespeccmp(timespec, &failchk_timespec, >))
+			timespec = &failchk_timespec;
+	}
+#endif
+
 	if (timespec != NULL) {
-		RET_SET((pthread_cond_timedwait(&mutexp->u.m.cond,
-		    &mutexp->u.m.mutex, (struct timespec *) timespec)), ret);
+		RET_SET(pthread_cond_timedwait(&mutexp->u.m.cond,
+		    &mutexp->u.m.mutex, (struct timespec *) timespec), ret);
+		ret = ETIME_TO_ETIMEDOUT(ret);
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+		    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+			ret = USR_ERR(env, __mutex_died(env, mutex));
+			goto err;
+		}
+#endif
 		if (ret == ETIMEDOUT) {
 			ret = DB_TIMEOUT;
-			goto ret;
+			goto err;
 		}
 	} else
-		RET_SET((pthread_cond_wait(&mutexp->u.m.cond,
-		    &mutexp->u.m.mutex)), ret);
-#ifdef MUTEX_DIAG
-	printf("condwait %ld %x wait returns %d busy %x\n",
-	    mutex, pthread_self(), ret, MUTEXP_BUSY_FIELD(mutexp));
+		RET_SET(pthread_cond_wait(&mutexp->u.m.cond,
+		    &mutexp->u.m.mutex), ret);
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (ret == 0 && F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+	    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+		ret = USR_ERR(env, __mutex_died(env, mutex));
+		goto err;
+	}
 #endif
 	/*
 	 * !!!
 	 * Solaris bug workaround: pthread_cond_wait() sometimes returns ETIME
-	 * -- out  of sheer paranoia, check both ETIME and ETIMEDOUT.  We
+	 * -- out of sheer paranoia, check both ETIME and ETIMEDOUT.  We
 	 * believe this happens when the application uses SIGALRM for some
 	 * purpose, e.g., the C library sleep call, and Solaris delivers the
-	 * signal to the wrong  LWP.
+	 * signal to the wrong LWP.
 	 */
 	if (ret != 0) {
-		if (ret == ETIMEDOUT ||
-#ifdef ETIME
-		    ret == ETIME ||
-#endif
+		if ((ret = ETIME_TO_ETIMEDOUT(ret)) == ETIMEDOUT ||
 		    ret == EINTR)
 			ret = 0;
-		else
+		else {
 			/* Failure, caller shouldn't condwait again. */
 			(void)pthread_mutex_unlock(&mutexp->u.m.mutex);
+			(void)MUTEX_ERR(env, mutex, ret);
+		}
 	}
 
-ret:
+err:
 	PERFMON4(env, mutex, resume, mutex, TRUE, mutexp->alloc_id, mutexp);
 
 	COMPQUIET(mutex, 0);
@@ -356,7 +417,10 @@ ret:
 /*
  * __db_pthread_mutex_lock
  *	Lock on a mutex, blocking if necessary.
- *	Timeouts are supported only for self-blocking mutexes.
+ *	Timeouts are supported only for self-blocking mutexes. When both a
+ *	given timeout and a dbenv-wide failchk timeout are specified, the
+ *	given timeout takes precedence -- a process failure might not be noticed
+ *	for a little while.
  *
  *	Self-blocking shared latches are not supported.
  *
@@ -372,6 +436,7 @@ __db_pthread_mutex_lock(env, mutex, timeout)
 {
 	DB_ENV *dbenv;
 	DB_MUTEX *mutexp;
+	db_timeout_t checktimeout;
 	db_timespec timespec;
 	int ret, t_ret;
 
@@ -385,7 +450,6 @@ __db_pthread_mutex_lock(env, mutex, timeout)
 
 	CHECK_MTX_THREAD(env, mutexp);
 
-#if defined(HAVE_STATISTICS)
 	/*
 	 * We want to know which mutexes are contentious, but don't want to
 	 * do an interlocked test here -- that's slower when the underlying
@@ -398,6 +462,11 @@ __db_pthread_mutex_lock(env, mutex, timeout)
 	else
 		STAT_INC(env,
 		    mutex, set_nowait, mutexp->mutex_set_nowait, mutex);
+
+	checktimeout = timeout;
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (checktimeout == 0 || checktimeout > dbenv->mutex_failchk_timeout)
+		checktimeout = dbenv->mutex_failchk_timeout;
 #endif
 
 	/* Single-thread the next block, except during the possible condwait. */
@@ -405,14 +474,12 @@ __db_pthread_mutex_lock(env, mutex, timeout)
 		goto err;
 
 	if (F_ISSET(mutexp, DB_MUTEX_SELF_BLOCK)) {
-		if (timeout != 0)
+		if (checktimeout != 0)
 			timespecclear(&timespec);
 		while (MUTEXP_IS_BUSY(mutexp)) {
 			/* Set expiration timer upon first need. */
-			if (timeout != 0 && !timespecisset(&timespec)) {
-				timespecclear(&timespec);
+			if (checktimeout != 0 && !timespecisset(&timespec))
 				__clock_set_expires(env, &timespec, timeout);
-			}
 			t_ret = __db_pthread_mutex_condwait(env,
 			    mutex, mutexp, timeout == 0 ? NULL : &timespec);
 			if (t_ret != 0) {
@@ -428,18 +495,20 @@ __db_pthread_mutex_lock(env, mutex, timeout)
 out:
 		/* #2471: HP-UX can sporadically return EFAULT. See above */
 		RETRY_ON_EFAULT(pthread_mutex_unlock(&mutexp->u.m.mutex), ret);
-		if (ret != 0)
+		if (ret != 0) {
+			(void)MUTEX_ERR(env, mutex, ret);
 			goto err;
+		}
 	} else {
 #ifdef DIAGNOSTIC
 		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
 			char buf[DB_THREADID_STRLEN];
 			(void)dbenv->thread_id_string(dbenv,
 			    mutexp->pid, mutexp->tid, buf);
+			ret = MUTEX_ERR(env, mutex, EINVAL);
 			__db_errx(env, DB_STR_A("2022",
 		    "pthread lock failed: lock currently in use: pid/tid: %s",
 			    "%s"), buf);
-			ret = EINVAL;
 			goto err;
 		}
 #endif
@@ -455,6 +524,13 @@ out:
 	if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
 		__os_yield(env, 0, 0);
 #endif
+#ifdef MUTEX_DIAG
+	if (t_ret == 0) {
+		__os_gettime(env, &mutexp->mutex_history.when, 0);
+		__os_stack_text(env, mutexp->mutex_history.stacktext,
+		    sizeof(mutexp->mutex_history.stacktext), 12, 2);
+	}
+#endif
 	return (t_ret);
 
 err:
@@ -479,6 +555,10 @@ __db_pthread_mutex_readlock(env, mutex)
 {
 	DB_ENV *dbenv;
 	DB_MUTEX *mutexp;
+	MUTEX_STATE *state;
+#ifdef HAVE_FAILCHK_BROADCAST
+	db_timespec timespec;
+#endif
 	int ret;
 
 	dbenv = env->dbenv;
@@ -491,7 +571,6 @@ __db_pthread_mutex_readlock(env, mutex)
 
 	CHECK_MTX_THREAD(env, mutexp);
 
-#if defined(HAVE_STATISTICS)
 	/*
 	 * We want to know which mutexes are contentious, but don't want to
 	 * do an interlocked test here -- that's slower when the underlying
@@ -505,15 +584,52 @@ __db_pthread_mutex_readlock(env, mutex)
 	else
 		STAT_INC(env,
 		    mutex, set_rd_nowait, mutexp->mutex_set_rd_nowait, mutex);
-#endif
+
+	state = NULL;
+	if (env->thr_hashtab != NULL && (ret = __mutex_record_lock(env,
+	    mutex, MUTEX_ACTION_INTEND_SHARE, &state)) != 0)
+		return (ret);
 
 	PERFMON4(env, mutex, suspend, mutex, FALSE, mutexp->alloc_id, mutexp);
-	RET_SET((pthread_rwlock_rdlock(&mutexp->u.rwlock)), ret);
+
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (dbenv->mutex_failchk_timeout != 0) {
+		do {
+			timespecclear(&timespec);
+			__clock_set_expires(env,
+			    &timespec, dbenv->mutex_failchk_timeout);
+			RET_SET(pthread_rwlock_timedrdlock(&mutexp->u.rwlock,
+			    (struct timespec *)&timespec), ret);
+			if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+			    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+				if (ret == 0)
+					RETRY_ON_EFAULT(pthread_rwlock_unlock(
+					    &mutexp->u.rwlock), ret);
+				ret = USR_ERR(env, __mutex_died(env, mutex));
+				goto err;
+			}
+		} while (ret == DB_TIMEOUT);
+	} else
+#endif
+		RET_SET(pthread_rwlock_rdlock(&mutexp->u.rwlock), ret);
+
 	PERFMON4(env, mutex, resume, mutex, FALSE, mutexp->alloc_id, mutexp);
 	DB_ASSERT(env, !F_ISSET(mutexp, DB_MUTEX_LOCKED));
 	if (ret != 0)
 		goto err;
 
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+	    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+		ret = USR_ERR(env, __mutex_died(env, mutex));
+		goto err;
+	}
+#endif
+#ifdef MUTEX_DIAG
+	__os_gettime(env, &mutexp->mutex_history.when, 0);
+	__os_stack_text(env, mutexp->mutex_history.stacktext,
+	    sizeof(mutexp->mutex_history.stacktext), 12, 2);
+#endif
 #ifdef DIAGNOSTIC
 	/*
 	 * We want to switch threads as often as possible.  Yield every time
@@ -524,7 +640,10 @@ __db_pthread_mutex_readlock(env, mutex)
 #endif
 	return (0);
 
-err:	__db_err(env, ret, DB_STR("2024", "pthread readlock failed"));
+err:
+	if (state != NULL)
+		state->action = MUTEX_ACTION_UNLOCKED;
+	__db_err(env, ret, DB_STR("2024", "pthread readlock failed"));
 	return (__env_panic(env, ret));
 }
 #endif
@@ -532,8 +651,10 @@ err:	__db_err(env, ret, DB_STR("2024", "pthread readlock failed"));
 #ifdef HAVE_MUTEX_HYBRID
 /*
  * __db_hybrid_mutex_suspend
- *	Suspend this thread until the mutex is free enough to give the caller a
- *	good chance of getting the mutex in the requested exclusivity mode.
+ *	Suspend this thread, usually until the mutex is free enough to give the
+ *	caller a good chance of getting the mutex in the requested exclusivity
+ *	mode. Return early if the timeout is reached or a dead mutex is found
+ *	to be dead.
  *
  *	The major difference between this and the old __db_pthread_mutex_lock()
  *	is the additional 'exclusive' parameter.
@@ -551,6 +672,9 @@ __db_hybrid_mutex_suspend(env, mutex, timespec, exclusive)
 	int exclusive;
 {
 	DB_MUTEX *mutexp;
+#ifdef HAVE_FAILCHECK_BROADCAST
+	db_timespec failchk_timespec;
+#endif
 	int ret, t_ret;
 
 	t_ret = 0;
@@ -571,7 +695,7 @@ __db_hybrid_mutex_suspend(env, mutex, timespec, exclusive)
 	 * before checking the wait counter.
 	 */
 	mutexp->wait++;
-	MUTEX_MEMBAR(mutexp->wait);
+	(void)MUTEX_MEMBAR(mutexp->wait);
 	while (exclusive ? MUTEXP_IS_BUSY(mutexp) :
 	    atomic_read(&mutexp->sharecount) == MUTEX_SHARE_ISEXCLUSIVE) {
 		t_ret = __db_pthread_mutex_condwait(env,
@@ -582,7 +706,7 @@ __db_hybrid_mutex_suspend(env, mutex, timespec, exclusive)
 			ret = t_ret;
 			goto err;
 		}
-		MUTEX_MEMBAR(mutexp->flags);
+		(void)MUTEX_MEMBAR(mutexp->flags);
 	}
 
 	mutexp->wait--;
@@ -627,8 +751,8 @@ __db_pthread_mutex_unlock(env, mutex)
 	DB_ENV *dbenv;
 	DB_MUTEX *mutexp;
 	int ret;
-#if defined(MUTEX_DIAG) && defined(HAVE_MUTEX_HYBRID)
-	int waiters;
+#ifndef HAVE_MUTEX_HYBRID
+	char description[DB_MUTEX_DESCRIBE_STRLEN];
 #endif
 
 	dbenv = env->dbenv;
@@ -637,14 +761,13 @@ __db_pthread_mutex_unlock(env, mutex)
 		return (0);
 
 	mutexp = MUTEXP_SET(env, mutex);
-#if defined(MUTEX_DIAG) && defined(HAVE_MUTEX_HYBRID)
-	waiters = mutexp->wait;
-#endif
 
-#if !defined(HAVE_MUTEX_HYBRID) && defined(DIAGNOSTIC)
+#if !defined(HAVE_MUTEX_HYBRID)
 	if (!F_ISSET(mutexp, DB_MUTEX_LOCKED | DB_MUTEX_SHARED)) {
-		__db_errx(env, DB_STR("2025",
-		    "pthread unlock failed: lock already unlocked"));
+		if (!PANIC_ISSET(env))
+			__db_errx(env, DB_STR("2069",
+			    "pthread unlock %s: already unlocked"),
+			    __mutex_describe(env, mutex, description));
 		return (__env_panic(env, EACCES));
 	}
 #endif
@@ -662,14 +785,19 @@ __db_pthread_mutex_unlock(env, mutex)
 
 		if (F_ISSET(mutexp, DB_MUTEX_SHARED))
 			RET_SET(
-			    (pthread_cond_broadcast(&mutexp->u.m.cond)), ret);
+			    pthread_cond_broadcast(&mutexp->u.m.cond), ret);
 		else
-			RET_SET((pthread_cond_signal(&mutexp->u.m.cond)), ret);
+			RET_SET(pthread_cond_signal(&mutexp->u.m.cond), ret);
 		if (ret != 0)
 			goto err;
 	} else {
 #ifndef HAVE_MUTEX_HYBRID
-		F_CLR(mutexp, DB_MUTEX_LOCKED);
+
+		if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
+			F_CLR(mutexp, DB_MUTEX_LOCKED);
+		else if (env->thr_hashtab != NULL &&
+		    (ret = __mutex_record_unlock(env, mutex)) != 0)
+		    	goto err;
 #endif
 	}
 
@@ -685,12 +813,6 @@ err:	if (ret != 0) {
 		__db_err(env, ret, "pthread unlock failed");
 		return (__env_panic(env, ret));
 	}
-#if defined(MUTEX_DIAG) && defined(HAVE_MUTEX_HYBRID)
-	if (!MUTEXP_IS_BUSY(mutexp) && mutexp->wait != 0)
-		printf("unlock %ld %x busy %x waiters %d/%d\n",
-		    mutex, pthread_self(), ret,
-		    MUTEXP_BUSY_FIELD(mutexp), waiters, mutexp->wait);
-#endif
 	return (ret);
 }
 
@@ -739,7 +861,7 @@ __db_pthread_mutex_destroy(env, mutex)
 		if (!failchk_thread)
 #endif
 			RET_SET(
-			    (pthread_rwlock_destroy(&mutexp->u.rwlock)), ret);
+			    pthread_rwlock_destroy(&mutexp->u.rwlock), ret);
 		/* For rwlocks, we're done - must not destroy rest of union */
 		return (ret);
 #endif
@@ -754,15 +876,14 @@ __db_pthread_mutex_destroy(env, mutex)
 #ifdef HAVE_PTHREAD_COND_REINIT_OKAY
 		if (!failchk_thread)
 #endif
-			RET_SET((pthread_cond_destroy(&mutexp->u.m.cond)), ret);
+			RET_SET(pthread_cond_destroy(&mutexp->u.m.cond), ret);
 		if (ret != 0)
 			__db_err(env, ret, DB_STR("2026",
 			    "unable to destroy cond"));
 	}
-	RET_SET((pthread_mutex_destroy(&mutexp->u.m.mutex)), t_ret);
+	RET_SET(pthread_mutex_destroy(&mutexp->u.m.mutex), t_ret);
 	if (t_ret != 0 && !failchk_thread) {
-		__db_err(env, t_ret, DB_STR("2027",
-		    "unable to destroy mutex"));
+		__db_err(env, t_ret, DB_STR("2027", "unable to destroy mutex"));
 		if (ret == 0)
 			ret = t_ret;
 	}
diff --git a/src/mutex/mut_region.c b/src/mutex/mut_region.c
index 26ae0a03..976ff231 100644
--- a/src/mutex/mut_region.c
+++ b/src/mutex/mut_region.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -34,7 +34,7 @@ __mutex_open(env, create_ok)
 	DB_MUTEXMGR *mtxmgr;
 	DB_MUTEXREGION *mtxregion;
 	size_t size;
-	u_int32_t cpu_count;
+	u_int32_t cpu_count, tas_spins;
 	int ret;
 #ifndef HAVE_ATOMIC_SUPPORT
 	u_int i;
@@ -55,8 +55,14 @@ __mutex_open(env, create_ok)
 		dbenv->mutex_align = MUTEX_ALIGN;
 	if (dbenv->mutex_tas_spins == 0) {
 		cpu_count = __os_cpu_count();
-		if ((ret = __mutex_set_tas_spins(dbenv, cpu_count == 1 ?
-		    cpu_count : cpu_count * MUTEX_SPINS_PER_PROCESSOR)) != 0)
+		if (cpu_count == 1)
+			tas_spins = 1;
+		else {
+			tas_spins = cpu_count * MUTEX_SPINS_PER_PROCESSOR;
+			if (tas_spins > MUTEX_SPINS_DEFAULT_MAX)
+			    tas_spins = MUTEX_SPINS_DEFAULT_MAX;
+		}
+		if ((ret = __mutex_set_tas_spins(dbenv, tas_spins)) != 0)
 			return (ret);
 	}
 
@@ -118,11 +124,29 @@ __mutex_open(env, create_ok)
 
 	return (0);
 
-err:	env->mutex_handle = NULL;
-	if (mtxmgr->reginfo.addr != NULL)
-		(void)__env_region_detach(env, &mtxmgr->reginfo, 0);
+err:	(void)__mutex_region_detach(env, mtxmgr);
+	return (ret);
+}
 
-	__os_free(env, mtxmgr);
+/*
+ * __mutex_region_detach --
+ *
+ * PUBLIC: int __mutex_region_detach __P((ENV *, DB_MUTEXMGR *));
+ */
+int
+__mutex_region_detach(env, mtxmgr)
+	ENV *env;
+	DB_MUTEXMGR *mtxmgr;
+{
+	int ret;
+
+	ret = 0;
+	if (mtxmgr != NULL) {
+		if (mtxmgr->reginfo.addr != NULL)
+			ret = __env_region_detach(env, &mtxmgr->reginfo, 0);
+		__os_free(env, mtxmgr);
+		env->mutex_handle = NULL;
+	}
 	return (ret);
 }
 
@@ -136,7 +160,6 @@ __mutex_region_init(env, mtxmgr)
 	DB_MUTEXMGR *mtxmgr;
 {
 	DB_ENV *dbenv;
-	DB_MUTEX *mutexp;
 	DB_MUTEXREGION *mtxregion;
 	db_mutex_t mutex;
 	int ret;
@@ -144,8 +167,6 @@ __mutex_region_init(env, mtxmgr)
 
 	dbenv = env->dbenv;
 
-	COMPQUIET(mutexp, NULL);
-
 	if ((ret = __env_alloc(&mtxmgr->reginfo,
 	    sizeof(DB_MUTEXREGION), &mtxmgr->reginfo.primary)) != 0) {
 		__db_errx(env, DB_STR("2013",
@@ -205,26 +226,11 @@ __mutex_region_init(env, mtxmgr)
 	 * in each link.
 	 */
 	env->mutex_handle = mtxmgr;
-	if (F_ISSET(env, ENV_PRIVATE)) {
-		mutexp = (DB_MUTEX *)mutex_array;
-		mutexp++;
-		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
-		mtxregion->mutex_next = (db_mutex_t)mutexp;
-	} else {
-		mtxregion->mutex_next = 1;
-		mutexp = MUTEXP_SET(env, 1);
-	}
-	for (mutex = 1; mutex < mtxregion->stat.st_mutex_cnt; ++mutex) {
-		mutexp->flags = 0;
-		if (F_ISSET(env, ENV_PRIVATE))
-			mutexp->mutex_next_link = (db_mutex_t)(mutexp + 1);
-		else
-			mutexp->mutex_next_link = mutex + 1;
-		mutexp++;
-		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
-	}
-	mutexp->flags = 0;
-	mutexp->mutex_next_link = MUTEX_INVALID;
+	mtxregion->mutex_next = (F_ISSET(env, ENV_PRIVATE) ?
+	    ((uintptr_t)mutex_array + mtxregion->mutex_size) : 1);
+	MUTEX_BULK_INIT(env,
+	    mtxregion, mtxregion->mutex_next, mtxregion->stat.st_mutex_cnt);
+
 	mtxregion->stat.st_mutex_free = mtxregion->stat.st_mutex_cnt;
 	mtxregion->stat.st_mutex_inuse = mtxregion->stat.st_mutex_inuse_max = 0;
 	if ((ret = __mutex_alloc(env, MTX_MUTEX_REGION, 0, &mutex)) != 0)
diff --git a/src/mutex/mut_stat.c b/src/mutex/mut_stat.c
index b64207fa..af622c7d 100644
--- a/src/mutex/mut_stat.c
+++ b/src/mutex/mut_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -19,6 +19,17 @@ static int __mutex_print_stats __P((ENV *, u_int32_t));
 static void __mutex_print_summary __P((ENV *));
 static int __mutex_stat __P((ENV *, DB_MUTEX_STAT **, u_int32_t));
 
+static const FN MutexFlagNames[] = {
+	{ DB_MUTEX_ALLOCATED,		"alloc" },
+	{ DB_MUTEX_LOCKED,		"locked" },
+	{ DB_MUTEX_LOGICAL_LOCK,	"logical" },
+	{ DB_MUTEX_OWNER_DEAD,		"ower-dead" },
+	{ DB_MUTEX_PROCESS_ONLY,	"process-private" },
+	{ DB_MUTEX_SELF_BLOCK,		"self-block" },
+	{ DB_MUTEX_SHARED,		"shared" },
+	{ 0,				NULL }
+};
+
 /*
  * __mutex_stat_pp --
  *	ENV->mutex_stat pre/post processing.
@@ -170,11 +181,12 @@ __mutex_print_summary(env)
 	size = 0;
 
 	if (F_ISSET(env, ENV_PRIVATE)) {
-		mutexp = (DB_MUTEX *)mtxmgr->mutex_array + 1;
+		mutexp = (DB_MUTEX *)((uintptr_t)mtxmgr->mutex_array +
+		    mtxregion->mutex_size);
 		chunk = NULL;
 		size = __env_elem_size(env,
 		    ROFF_TO_P(mtxregion->mutex_off_alloc));
-		size -= sizeof(*mutexp);
+		size -= mtxregion->mutex_size;
 	} else
 		mutexp = MUTEXP_SET(env, 1);
 	for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i) {
@@ -185,13 +197,15 @@ __mutex_print_summary(env)
 		else
 			counts[mutexp->alloc_id]++;
 
-		mutexp++;
+		mutexp = (DB_MUTEX *)((uintptr_t)mutexp +
+		    mtxregion->mutex_size);
 		if (F_ISSET(env, ENV_PRIVATE) &&
 		    (size -= sizeof(*mutexp)) < sizeof(*mutexp)) {
 			mutexp =
 			    __env_get_chunk(&mtxmgr->reginfo, &chunk, &size);
+			mutexp = ALIGNP_INC(mutexp,
+			    mtxregion->stat.st_mutex_align);
 		}
-		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
 	}
 	__db_msg(env, "Mutex counts");
 	__db_msg(env, "%d\tUnallocated", counts[0]);
@@ -252,14 +266,6 @@ __mutex_print_all(env, flags)
 	ENV *env;
 	u_int32_t flags;
 {
-	static const FN fn[] = {
-		{ DB_MUTEX_ALLOCATED,		"alloc" },
-		{ DB_MUTEX_LOCKED,		"locked" },
-		{ DB_MUTEX_LOGICAL_LOCK,	"logical" },
-		{ DB_MUTEX_PROCESS_ONLY,	"process-private" },
-		{ DB_MUTEX_SELF_BLOCK,		"self-block" },
-		{ 0,				NULL }
-	};
 	DB_MSGBUF mb, *mbp;
 	DB_MUTEX *mutexp;
 	DB_MUTEXMGR *mtxmgr;
@@ -294,37 +300,32 @@ __mutex_print_all(env, flags)
 	__db_msg(env, "mutex\twait/nowait, pct wait, holder, flags");
 	size = 0;
 	if (F_ISSET(env, ENV_PRIVATE)) {
-		mutexp = (DB_MUTEX *)mtxmgr->mutex_array + 1;
+		mutexp = (DB_MUTEX *)((uintptr_t)mtxmgr->mutex_array +
+		    mtxregion->mutex_size);
 		chunk = NULL;
 		size = __env_elem_size(env,
 		    ROFF_TO_P(mtxregion->mutex_off_alloc));
-		size -= sizeof(*mutexp);
+		size -= mtxregion->mutex_size;
 	} else
 		mutexp = MUTEXP_SET(env, 1);
 	for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i) {
 		if (F_ISSET(mutexp, DB_MUTEX_ALLOCATED)) {
 			__db_msgadd(env, mbp, "%5lu\t", (u_long)i);
-
 			__mutex_print_debug_stats(env, mbp,
 			    F_ISSET(env, ENV_PRIVATE) ?
 			    (db_mutex_t)mutexp : i, flags);
-
-			if (mutexp->alloc_id != 0)
-				__db_msgadd(env, mbp,
-				    ", %s", __mutex_print_id(mutexp->alloc_id));
-
-			__db_prflags(env, mbp, mutexp->flags, fn, " (", ")");
-
 			DB_MSGBUF_FLUSH(env, mbp);
 		}
 
-		mutexp++;
+		mutexp = (DB_MUTEX *)((uintptr_t)mutexp +
+		    mtxregion->mutex_size);
 		if (F_ISSET(env, ENV_PRIVATE) &&
-		    (size -= sizeof(*mutexp)) < sizeof(*mutexp)) {
+		    (size -= mtxregion->mutex_size) < mtxregion->mutex_size) {
 			mutexp =
 			    __env_get_chunk(&mtxmgr->reginfo, &chunk, &size);
+			mutexp = ALIGNP_INC(mutexp,
+			    mtxregion->stat.st_mutex_align);
 		}
-		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
 	}
 
 	return (0);
@@ -332,8 +333,7 @@ __mutex_print_all(env, flags)
 
 /*
  * __mutex_print_debug_single --
- *	Print mutex internal debugging statistics for a single mutex on a
- *	single output line.
+ *	Print mutex internal debugging statistics for a single mutex.
  *
  * PUBLIC: void __mutex_print_debug_single
  * PUBLIC:          __P((ENV *, const char *, db_mutex_t, u_int32_t));
@@ -359,8 +359,9 @@ __mutex_print_debug_single(env, tag, mutex, flags)
 
 /*
  * __mutex_print_debug_stats --
- *	Print mutex internal debugging statistics, that is, the statistics
- *	in the [] square brackets.
+ *	Print the mutex internal debugging statistics in square bracket,s on a
+ *	followed by the allocation id and flags, on single line. When MUTEX_DIAG
+ *	is on and the mutex is held, append the owner's stack trace.
  *
  * PUBLIC: void __mutex_print_debug_stats
  * PUBLIC:          __P((ENV *, DB_MSGBUF *, db_mutex_t, u_int32_t));
@@ -380,6 +381,9 @@ __mutex_print_debug_stats(env, mbp, mutex, flags)
     !defined(HAVE_MUTEX_PTHREADS))
 	int sharecount;
 #endif
+#ifdef MUTEX_DIAG
+	char timestr[CTIME_BUFLEN];
+#endif
 
 	if (mutex == MUTEX_INVALID) {
 		__db_msgadd(env, mbp, "[!Set]");
@@ -448,6 +452,22 @@ __mutex_print_debug_stats(env, mbp, mutex, flags)
 		    mutexp->hybrid_wait, mutexp->hybrid_wakeup);
 #endif
 
+	if (mutexp->alloc_id != 0)
+		__db_msgadd(env,
+		    mbp, ", %s", __mutex_print_id(mutexp->alloc_id));
+
+	__db_prflags(env, mbp, mutexp->flags, MutexFlagNames, " (", ")");
+#ifdef MUTEX_DIAG
+	if (mutexp->alloc_id != MTX_LOGICAL_LOCK &&
+	    timespecisset(&mutexp->mutex_history.when)) {
+		__db_ctimespec(&mutexp->mutex_history.when, timestr);
+		__db_msgadd(env, mbp, "\nLocked %s", timestr);
+		if (mutexp->mutex_history.stacktext[0] != '\0')
+			__db_msgadd(env, mbp, "\n%.*s",
+			    (int)sizeof(mutexp->mutex_history.stacktext) - 1,
+			    mutexp->mutex_history.stacktext);
+	}
+#endif
 	if (LF_ISSET(DB_STAT_CLEAR))
 		__mutex_clear(env, mutex);
 }
@@ -495,7 +515,8 @@ __mutex_print_id(alloc_id)
 	case MTX_TXN_COMMIT:		return ("txn commit");
 	case MTX_TXN_MVCC:		return ("txn mvcc");
 	case MTX_TXN_REGION:		return ("txn region");
-	default:			return ("unknown mutex type");
+	case 0:				return ("invalid 0 mutex type");
+	default:			return ("unknown non-zero mutex type");
 	/* NOTREACHED */
 	}
 }
@@ -577,3 +598,39 @@ __mutex_stat_print_pp(dbenv, flags)
 	return (__db_stat_not_built(dbenv->env));
 }
 #endif
+
+/*
+ * __mutex_describe
+ *	Fill in a buffer with the mutex #, alloc_id, and any other
+ *	characteristics which are likely to be useful for diagnostics. The
+ *	destination buffer must hold at least DB_MUTEX_DESCRIBE_STRLEN bytes.
+ *
+ * PUBLIC: char *__mutex_describe __P((ENV *, db_mutex_t, char *));
+ */
+char *
+__mutex_describe(env, mutex, dest)
+	ENV *env;
+	db_mutex_t mutex;
+	char *dest;
+{
+	DB_MUTEX *mutexp;
+	DB_MSGBUF mb, *mbp;
+	const char *type;
+
+	DB_MSGBUF_INIT(&mb);
+	mbp = &mb;
+	mutexp = MUTEXP_SET(env, mutex);
+	type = F_ISSET(mutexp, DB_MUTEX_SHARED) ? "latch" : "mutex";
+#ifdef HAVE_STATISTICS
+	__db_msgadd(env, mbp, "%s %s id %ld ",
+	    __mutex_print_id(mutexp->alloc_id), type, (long)mutex);
+	__db_prflags(env, mbp, mutexp->flags, MutexFlagNames, " (", ")");
+#else
+	__db_msgadd(env, mbp, "%s flags %x id %ld ",
+	    type, mutexp->flags, (long)mutex);
+#endif
+	(void)snprintf(dest, DB_MUTEX_DESCRIBE_STRLEN - 1,
+	    "%.*s", (int)(mbp->cur - mbp->buf), mbp->buf);
+	dest[DB_MUTEX_DESCRIBE_STRLEN - 1] = '\0';
+	return (dest);
+}
diff --git a/src/mutex/mut_stub.c b/src/mutex/mut_stub.c
index 61ecc80c..0ece9a9d 100644
--- a/src/mutex/mut_stub.c
+++ b/src/mutex/mut_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -157,6 +157,16 @@ __mutex_print_debug_stats(env, mbp, mutex, flags)
 }
 
 int
+__mutex_refresh(env, mutex)
+	ENV *env;
+	db_mutex_t mutex;
+{
+	COMPQUIET(env, NULL);
+	COMPQUIET(mutex, MUTEX_INVALID);
+	return (0);
+}
+
+int
 __mutex_set_align(dbenv, align)
 	DB_ENV *dbenv;
 	u_int32_t align;
diff --git a/src/mutex/mut_tas.c b/src/mutex/mut_tas.c
index 0899d237..c7cc3ea5 100644
--- a/src/mutex/mut_tas.c
+++ b/src/mutex/mut_tas.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -52,8 +52,7 @@ __db_tas_mutex_init(env, mutex, flags)
 #endif
 	if (MUTEX_INIT(&mutexp->tas)) {
 		ret = __os_get_syserr();
-		__db_syserr(env, ret, DB_STR("2029",
-		    "TAS: mutex initialize"));
+		__db_syserr(env, ret, DB_STR("2029", "TAS: mutex initialize"));
 		return (__os_posix_err(ret));
 	}
 #ifdef HAVE_MUTEX_HYBRID
@@ -66,7 +65,9 @@ __db_tas_mutex_init(env, mutex, flags)
 
 /*
  * __db_tas_mutex_lock_int
- *     Internal function to lock a mutex, or just try to lock it without waiting
+ *	Internal function to lock a mutex, or just try to lock it without
+ *	waiting. MUTEX_WAIT() passes in a timeout to allow an early exit
+ *	returning DB_TIMEOUT.
  */
 inline static int
 __db_tas_mutex_lock_int(env, mutex, timeout, nowait)
@@ -80,13 +81,15 @@ __db_tas_mutex_lock_int(env, mutex, timeout, nowait)
 	DB_MUTEXMGR *mtxmgr;
 	DB_MUTEXREGION *mtxregion;
 	DB_THREAD_INFO *ip;
-	db_timespec now, timespec;
+	db_timespec now, timeout_timespec;
 	u_int32_t nspins;
+	u_long micros;
 	int ret;
-#ifdef HAVE_MUTEX_HYBRID
-	const u_long micros = 0;
-#else
-	u_long micros, max_micros;
+#ifdef DIAGNOSTIC
+	char buf[DB_THREADID_STRLEN];
+#endif
+#ifndef HAVE_MUTEX_HYBRID
+	u_long max_micros;
 	db_timeout_t time_left;
 #endif
 
@@ -95,21 +98,23 @@ __db_tas_mutex_lock_int(env, mutex, timeout, nowait)
 	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
 		return (0);
 
+	PANIC_CHECK(env);
+
 	mtxmgr = env->mutex_handle;
 	mtxregion = mtxmgr->reginfo.primary;
 	mutexp = MUTEXP_SET(env, mutex);
 
 	CHECK_MTX_THREAD(env, mutexp);
 
-#ifdef HAVE_STATISTICS
 	if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
 		STAT_INC(env, mutex, set_wait, mutexp->mutex_set_wait, mutex);
 	else
 		STAT_INC(env,
 		    mutex, set_nowait, mutexp->mutex_set_nowait, mutex);
-#endif
 
-#ifndef HAVE_MUTEX_HYBRID
+#ifdef HAVE_MUTEX_HYBRID
+	micros = 0;
+#else
 	/*
 	 * Wait 1ms initially, up to 10ms for mutexes backing logical database
 	 * locks, and up to 25 ms for mutual exclusion data structure mutexes.
@@ -119,16 +124,15 @@ __db_tas_mutex_lock_int(env, mutex, timeout, nowait)
 	max_micros = F_ISSET(mutexp, DB_MUTEX_LOGICAL_LOCK) ? 10000 : 25000;
 #endif
 
-	/* Clear the ending timespec so it'll be initialed upon first need. */
+	/* Clear the ending timespec so it'll be initialized upon first need. */
 	if (timeout != 0)
-		timespecclear(&timespec);
+		timespecclear(&timeout_timespec);
 
 	 /*
-	 * Only check the thread state once, by initializing the thread
-	 * control block pointer to null.  If it is not the failchk
-	 * thread, then ip will have a valid value subsequent times
-	 * in the loop.
-	 */
+	  * Only check the thread state once, by initializing the thread
+	  * control block pointer to null.  If it is not the failchk thread,
+	  * then ip will be valid during the subsequent times in the loop.
+	  */
 	ip = NULL;
 
 loop:	/* Attempt to acquire the resource for N spins. */
@@ -151,16 +155,45 @@ loop:	/* Attempt to acquire the resource for N spins. */
 			if (F_ISSET(dbenv, DB_ENV_FAILCHK) &&
 			    ip == NULL && dbenv->is_alive(dbenv,
 			    mutexp->pid, mutexp->tid, 0) == 0) {
+				/*
+				 * The process owing the mutex is "dead" now, but it may
+				 * have already released the mutex. We need to check again
+				 * by going back to the top of the loop if the mutex is 
+				 * still held by the "dead" process. We yield 10 us to
+				 * increase the likelyhood of mutexp fields being up-to-date.
+				 * Set spin so we spin one more time because there isno need
+				 * to spin more if the dead process owns the mutex.
+				 */                               
+				if (nspins > 1) {
+					nspins = 2;
+					__os_yield(env, 0, 10);
+					continue;
+				}
 				ret = __env_set_state(env, &ip, THREAD_VERIFY);
 				if (ret != 0 ||
-				    ip->dbth_state == THREAD_FAILCHK)
-					return (DB_RUNRECOVERY);
+				    ip->dbth_state == THREAD_FAILCHK) {
+					/*
+					 * Either we could not get the thread
+					 * state or we did and found that this
+					 * is the failchk thread. Return a panic
+					 * code in either case, but if the
+					 * failchk thread don't give more
+					 * notice of the already-existing panic.
+					 */
+				    	if (ret == 0)
+						return (USR_ERR(env,
+						    DB_RUNRECOVERY));
+					else
+						return (__env_panic(env,
+							USR_ERR(env, ret)));
+				}
 			}
 			if (nowait)
-				return (DB_LOCK_NOTGRANTED);
+				return (USR_ERR(env, DB_LOCK_NOTGRANTED));
 			/*
 			 * Some systems (notably those with newer Intel CPUs)
 			 * need a small pause here. [#6975]
+			 * XXX Is there some better post-Pentum 4?
 			 */
 			MUTEX_PAUSE
 			continue;
@@ -189,9 +222,14 @@ loop:	/* Attempt to acquire the resource for N spins. */
 		 * the DB mutex unlock function.
 		 */
 #endif
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD)) {
+			MUTEX_UNSET(&mutexp->tas);
+			return (__mutex_died(env, mutex));
+		}
+#endif
 #ifdef DIAGNOSTIC
 		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
-			char buf[DB_THREADID_STRLEN];
 			__db_errx(env, DB_STR_A("2030",
 		    "TAS lock failed: lock %ld currently in use: ID: %s",
 			    "%ld %s"), (long)mutex,
@@ -202,6 +240,12 @@ loop:	/* Attempt to acquire the resource for N spins. */
 #endif
 		F_SET(mutexp, DB_MUTEX_LOCKED);
 		dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
+#if defined(MUTEX_DIAG)
+		__os_gettime(env, &mutexp->mutex_history.when, 0);
+		/* Why 3? Skip __os_stack_text, __db_tas_mutex_lock{_int,} */
+		__os_stack_text(env, mutexp->mutex_history.stacktext,
+		    sizeof(mutexp->mutex_history.stacktext), 12, 3);
+#endif
 
 #ifdef DIAGNOSTIC
 		/*
@@ -215,20 +259,20 @@ loop:	/* Attempt to acquire the resource for N spins. */
 	}
 
 	/*
-	 * We need to wait for the lock to become available.
-	 * Possibly setup timeouts if this is the first wait, or
-	 * check expiration times for the second and subsequent waits.
+	 * We need to wait for the lock to become available.  Setup timeouts if
+	 * this is the first wait, or the failchk timeout is smaller than the
+	 * wait timeout. Check expiration times for subsequent waits.
 	 */
 	if (timeout != 0) {
 		/* Set the expiration time if this is the first sleep . */
-		if (!timespecisset(&timespec))
-			__clock_set_expires(env, &timespec, timeout);
+		if (!timespecisset(&timeout_timespec))
+			__clock_set_expires(env, &timeout_timespec, timeout);
 		else {
 			timespecclear(&now);
-			if (__clock_expired(env, &now, &timespec))
-				return (DB_TIMEOUT);
+			if (__clock_expired(env, &now, &timeout_timespec))
+				return (USR_ERR(env, DB_TIMEOUT));
 #ifndef HAVE_MUTEX_HYBRID
-			timespecsub(&now, &timespec);
+			timespecsub(&now, &timeout_timespec);
 			DB_TIMESPEC_TO_TIMEOUT(time_left, &now, 0);
 			time_left = timeout - time_left;
 			if (micros > time_left)
@@ -253,13 +297,21 @@ loop:	/* Attempt to acquire the resource for N spins. */
 		goto loop;
 	/* Wait until the mutex can be obtained exclusively or it times out. */
 	if ((ret = __db_hybrid_mutex_suspend(env,
-	    mutex, timeout == 0 ? NULL : &timespec, TRUE)) != 0)
+	    mutex, timeout == 0 ? NULL : &timeout_timespec, TRUE)) != 0) {
+		DB_DEBUG_MSG(env,
+		    "mutex_lock %ld suspend returned %d", (u_long)mutex, ret);
 		return (ret);
+	}
 #else
 	if ((micros <<= 1) > max_micros)
 		micros = max_micros;
 #endif
 
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+	    dbenv->mutex_failchk_timeout != 0)
+		return (__mutex_died(env, mutex));
+#endif
 	/*
 	 * We're spinning.  The environment might be hung, and somebody else
 	 * has already recovered it.  The first thing recovery does is panic
@@ -291,7 +343,7 @@ __db_tas_mutex_lock(env, mutex, timeout)
  *	Try to exclusively lock a mutex without ever blocking - ever!
  *
  *	Returns 0 on success,
- *		DB_LOCK_NOTGRANTED on timeout
+ *		DB_LOCK_NOTGRANTED if it is busy.
  *		Possibly DB_RUNRECOVERY if DB_ENV_FAILCHK or panic.
  *
  *	This will work for DB_MUTEX_SHARED, though it always tries
@@ -324,9 +376,9 @@ __db_tas_mutex_readlock_int(env, mutex, nowait)
 	DB_MUTEXMGR *mtxmgr;
 	DB_MUTEXREGION *mtxregion;
 	DB_THREAD_INFO *ip;
-	int lock;
+	MUTEX_STATE *state;
+	int lock, ret;
 	u_int32_t nspins;
-	int ret;
 #ifndef HAVE_MUTEX_HYBRID
 	u_long micros, max_micros;
 #endif
@@ -342,14 +394,17 @@ __db_tas_mutex_readlock_int(env, mutex, nowait)
 	CHECK_MTX_THREAD(env, mutexp);
 
 	DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_SHARED));
-#ifdef HAVE_STATISTICS
 	if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
 		STAT_INC(env,
 		    mutex, set_rd_wait, mutexp->mutex_set_rd_wait, mutex);
 	else
 		STAT_INC(env,
 		    mutex, set_rd_nowait, mutexp->mutex_set_rd_nowait, mutex);
-#endif
+
+	state = NULL;
+	if (env->thr_hashtab != NULL && (ret = __mutex_record_lock(env,
+	    mutex, MUTEX_ACTION_INTEND_SHARE, &state)) != 0)
+		return (ret);
 
 #ifndef HAVE_MUTEX_HYBRID
 	/*
@@ -375,25 +430,52 @@ loop:	/* Attempt to acquire the resource for N spins. */
 			MUTEX_PAUSE
 			continue;
 		}
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+		    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+			(void)atomic_compare_exchange(env,
+			    &mutexp->sharecount, lock, lock - 1);
+			if (state != NULL)
+				state->action = MUTEX_ACTION_UNLOCKED;
+		       return (__mutex_died(env, mutex));
+	       }
+#endif
 
 		MEMBAR_ENTER();
+#ifdef MUTEX_DIAG
+		__os_gettime(env, &mutexp->mutex_history.when, 0);
+		__os_stack_text(env, mutexp->mutex_history.stacktext,
+		    sizeof(mutexp->mutex_history.stacktext), 12, 3);
+#endif
 		/* For shared latches the threadid is the last requestor's id.
 		 */
 		dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
+		if (state != NULL)
+			state->action = MUTEX_ACTION_SHARED;
 
 		return (0);
 	}
 
-	/*
-	 * Waiting for the latched must be avoided when it could allow a
-	 * 'failchk'ing thread to hang.
-	 */
+	/* Waiting for the latch must be avoided if it could hang up failchk. */
 	if (F_ISSET(dbenv, DB_ENV_FAILCHK) &&
 	    dbenv->is_alive(dbenv, mutexp->pid, mutexp->tid, 0) == 0) {
 		ret = __env_set_state(env, &ip, THREAD_VERIFY);
-		if (ret != 0 || ip->dbth_state == THREAD_FAILCHK)
-			return (DB_RUNRECOVERY);
+		if (ret != 0 || ip->dbth_state == THREAD_FAILCHK) {
+			if (state != NULL)
+				state->action = MUTEX_ACTION_UNLOCKED;
+			if (ret == 0)
+				return (USR_ERR(env, DB_RUNRECOVERY));
+			else
+				return (__env_panic(env, USR_ERR(env, ret)));
+		}
 	}
+#ifdef HAVE_FAILCHK_BROADCAST
+       if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD)) {
+	       if (state != NULL)
+		       state->action = MUTEX_ACTION_UNLOCKED;
+	       return (__mutex_died(env, mutex));
+       }
+#endif
 
 	/*
 	 * It is possible to spin out when the latch is just shared, due to
@@ -403,6 +485,8 @@ loop:	/* Attempt to acquire the resource for N spins. */
 	if (nowait) {
 		if (atomic_read(&mutexp->sharecount) != MUTEX_SHARE_ISEXCLUSIVE)
 			goto loop;
+		if (state != NULL)
+			state->action = MUTEX_ACTION_UNLOCKED;
 		return (DB_LOCK_NOTGRANTED);
 	}
 
@@ -419,8 +503,11 @@ loop:	/* Attempt to acquire the resource for N spins. */
 	if (atomic_read(&mutexp->sharecount) != MUTEX_SHARE_ISEXCLUSIVE)
 		goto loop;
 	/* Wait until the mutex is no longer exclusively locked. */
-	if ((ret = __db_hybrid_mutex_suspend(env, mutex, NULL, FALSE)) != 0)
+	if ((ret = __db_hybrid_mutex_suspend(env, mutex, NULL, FALSE)) != 0) {
+		if (state != NULL)
+			state->action = MUTEX_ACTION_UNLOCKED;
 		return (ret);
+	}
 #else
 	PERFMON4(env, mutex, suspend, mutex, FALSE, mutexp->alloc_id, mutexp);
 	__os_yield(env, 0, micros);
@@ -486,17 +573,13 @@ __db_tas_mutex_tryreadlock(env, mutex)
  */
 int
 __db_tas_mutex_unlock(env, mutex)
-    ENV *env;
+	ENV *env;
 	db_mutex_t mutex;
 {
 	DB_ENV *dbenv;
 	DB_MUTEX *mutexp;
-#ifdef HAVE_MUTEX_HYBRID
 	int ret;
-#ifdef MUTEX_DIAG
-	int waiters;
-#endif
-#endif
+	char description[DB_MUTEX_DESCRIBE_STRLEN];
 #ifdef HAVE_SHARED_LATCHES
 	int sharecount;
 #endif
@@ -506,14 +589,14 @@ __db_tas_mutex_unlock(env, mutex)
 		return (0);
 
 	mutexp = MUTEXP_SET(env, mutex);
-#if defined(HAVE_MUTEX_HYBRID) && defined(MUTEX_DIAG)
-	waiters = mutexp->wait;
-#endif
 
 #if defined(DIAGNOSTIC)
 #if defined(HAVE_SHARED_LATCHES)
 	if (F_ISSET(mutexp, DB_MUTEX_SHARED)) {
 		if (atomic_read(&mutexp->sharecount) == 0) {
+			if (PANIC_ISSET(env))
+				return (__env_panic(env, 
+				    USR_ERR(env, DB_RUNRECOVERY)));
 			__db_errx(env, DB_STR_A("2031",
 			    "shared unlock %ld already unlocked", "%ld"),
 			    (long)mutex);
@@ -522,16 +605,39 @@ __db_tas_mutex_unlock(env, mutex)
 	} else
 #endif
 	if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
+		if (PANIC_ISSET(env))
+			return (__env_panic(env, 
+			    USR_ERR(env, DB_RUNRECOVERY)));
 		__db_errx(env, DB_STR_A("2032",
 		    "unlock %ld already unlocked", "%ld"), (long)mutex);
 		return (__env_panic(env, EACCES));
 	}
 #endif
+#ifdef MUTEX_DIAG
+	timespecclear(&mutexp->mutex_history.when);
+#endif
 
 #ifdef HAVE_SHARED_LATCHES
 	if (F_ISSET(mutexp, DB_MUTEX_SHARED)) {
 		sharecount = atomic_read(&mutexp->sharecount);
-		/*MUTEX_MEMBAR(mutexp->sharecount);*/		/* XXX why? */
+		/*
+		 * Many code paths contain sequence of the form
+		 *	MUTEX_LOCK(); ret = function(); MUTEX_UNLOCK();
+		 * If function() sees or causes a panic while it had temporarily
+		 * unlocked the mutex it won't be locked anymore. Don't confuse
+		 * the error by generating spurious follow-on messages.
+		 */
+		if (sharecount == 0) {
+was_not_locked:
+			if (!PANIC_ISSET(env)) {
+				__db_errx(env, DB_STR_A("2070",
+				    "Shared unlock %s: already unlocked", "%s"),
+				    __mutex_describe(env, mutex, description));
+				return (__env_panic(env, 
+				    USR_ERR(env, DB_RUNRECOVERY)));
+			}
+			return (__env_panic(env, EACCES));
+		    }
 		if (sharecount == MUTEX_SHARE_ISEXCLUSIVE) {
 			F_CLR(mutexp, DB_MUTEX_LOCKED);
 			/* Flush flag update before zeroing count */
@@ -542,12 +648,17 @@ __db_tas_mutex_unlock(env, mutex)
 			MEMBAR_EXIT();
 			sharecount = atomic_dec(env, &mutexp->sharecount);
 			DB_ASSERT(env, sharecount >= 0);
+			if (env->thr_hashtab != NULL &&
+			    (ret = __mutex_record_unlock(env, mutex)) != 0)
+				return (ret);
 			if (sharecount > 0)
 				return (0);
 		}
 	} else
 #endif
 	{
+		if (!F_ISSET(mutexp, DB_MUTEX_LOCKED))
+			goto was_not_locked;
 		F_CLR(mutexp, DB_MUTEX_LOCKED);
 		MUTEX_UNSET(&mutexp->tas);
 	}
@@ -559,17 +670,10 @@ __db_tas_mutex_unlock(env, mutex)
 #endif
 
 	/* Prevent the load of wait from being hoisted before MUTEX_UNSET */
-	MUTEX_MEMBAR(mutexp->flags);
+	(void)MUTEX_MEMBAR(mutexp->flags);
 	if (mutexp->wait &&
 	    (ret = __db_pthread_mutex_unlock(env, mutex)) != 0)
 		    return (ret);
-
-#ifdef MUTEX_DIAG
-	if (mutexp->wait)
-		printf("tas_unlock %ld %x waiters! busy %x waiters %d/%d\n",
-		    mutex, pthread_self(),
-		    MUTEXP_BUSY_FIELD(mutexp), waiters, mutexp->wait);
-#endif
 #endif
 
 	return (0);
diff --git a/src/mutex/mut_win32.c b/src/mutex/mut_win32.c
index 07d5a8dd..270e03fb 100644
--- a/src/mutex/mut_win32.c
+++ b/src/mutex/mut_win32.c
@@ -1,7 +1,7 @@
 /*
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2002, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2002, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -92,6 +92,9 @@ __db_win32_mutex_lock_int(env, mutex, timeout, wait)
 	db_timespec now, tempspec, timeoutspec;
 	db_timeout_t time_left;
 	int ret;
+#ifdef DIAGNOSTIC
+	char buf[DB_THREADID_STRLEN];
+#endif
 #ifdef MUTEX_DIAG
 	LARGE_INTEGER now;
 #endif
@@ -143,8 +146,10 @@ loop:	/* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */
 			    mutexp->pid, mutexp->tid, 0) == 0) {
 				ret = __env_set_state(env, &ip, THREAD_VERIFY);
 				if (ret != 0 ||
-				    ip->dbth_state == THREAD_FAILCHK)
-					return (DB_RUNRECOVERY);
+				    ip->dbth_state == THREAD_FAILCHK) {
+					ret = DB_RUNRECOVERY;
+					goto failed;
+				}
 			}
 			if (!wait)
 				return (DB_LOCK_NOTGRANTED);
@@ -155,15 +160,20 @@ loop:	/* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */
 			MUTEX_PAUSE
 			continue;
 		}
-
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD)) {
+			MUTEX_UNSET(&mutexp->tas);
+			goto died;
+		}
+#endif
 #ifdef DIAGNOSTIC
 		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
-			char buf[DB_THREADID_STRLEN];
 			__db_errx(env, DB_STR_A("2003",
 			    "Win32 lock failed: mutex already locked by %s",
 			    "%s"), dbenv->thread_id_string(dbenv,
 			    mutexp->pid, mutexp->tid, buf));
-			return (__env_panic(env, EACCES));
+			ret = __env_panic(env, EACCES);
+			goto failed;
 		}
 #endif
 		F_SET(mutexp, DB_MUTEX_LOCKED);
@@ -179,11 +189,12 @@ loop:	/* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */
 			CloseHandle(event);
 			InterlockedDecrement(&mutexp->nwaiters);
 #ifdef MUTEX_DIAG
+			/* "ret" was set by WaitForSingleObject(). */
 			if (ret != WAIT_OBJECT_0) {
 				QueryPerformanceCounter(&diag_now);
 				printf(DB_STR_A("2004",
-				    "[%I64d]: Lost signal on mutex %p, "
-				    "id %d, ms %d\n", "%I64d %p %d %d"),
+				    "[%lld]: Lost signal on mutex %p, "
+				    "id %d, ms %d\n", "%lld %p %d %d"),
 				    diag_now.QuadPart, mutexp, mutexp->id, ms);
 			}
 #endif
@@ -210,11 +221,8 @@ loop:	/* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */
 	if (timeout != 0) {
 		timespecclear(&now);
 		if (__clock_expired(env, &now, &timeoutspec)) {
-			if (event != NULL) {
-				CloseHandle(event);
-				InterlockedDecrement(&mutexp->nwaiters);
-			}
-			return (DB_TIMEOUT);
+			ret = DB_TIMEOUT;
+			goto failed;
 		}
 		/* Reduce the event wait if the timeout would happen first. */
 		tempspec = timeoutspec;
@@ -228,24 +236,41 @@ loop:	/* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */
 #ifdef MUTEX_DIAG
 		QueryPerformanceCounter(&diag_now);
 		printf(DB_STR_A("2005",
-		    "[%I64d]: Waiting on mutex %p, id %d\n",
-		    "%I64d %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
+		    "[%lld]: Waiting on mutex %p, id %d\n",
+		    "%lld %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
 #endif
 		InterlockedIncrement(&mutexp->nwaiters);
-		if ((ret = get_handle(env, mutexp, &event)) != 0)
-			goto err;
+		if ((ret = get_handle(env, mutexp, &event)) != 0) {
+			InterlockedDecrement(&mutexp->nwaiters);
+			goto syserr;
+		}
 	}
 	if ((ret = WaitForSingleObject(event, ms)) == WAIT_FAILED) {
 		ret = __os_get_syserr();
-		goto err;
+		goto syserr;
 	}
 	if ((ms <<= 1) > MS_PER_SEC)
 		ms = MS_PER_SEC;
 
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+	    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+died:
+		ret = __mutex_died(env, mutex);
+		goto failed;
+	}
+#endif
 	PANIC_CHECK(env);
 	goto loop;
 
-err:	__db_syserr(env, ret, DB_STR("2006", "Win32 lock failed"));
+failed:
+	if (event != NULL) {
+		CloseHandle(event);
+		InterlockedDecrement(&mutexp->nwaiters);
+	}
+	return (ret);
+
+syserr:	__db_syserr(env, ret, DB_STR("2006", "Win32 lock failed"));
 	return (__env_panic(env, __os_posix_err(ret)));
 }
 
@@ -266,6 +291,12 @@ __db_win32_mutex_init(env, mutex, flags)
 	mutexp = MUTEXP_SET(env, mutex);
 	mutexp->id = ((getpid() & 0xffff) << 16) ^ P_TO_UINT32(mutexp);
 	F_SET(mutexp, flags);
+	/*
+	 * See WINCE_ATOMIC_MAGIC definition for details.
+	 * Use sharecount, because the value just needs to be a db_atomic_t
+	 * memory mapped onto the same page as those being Interlocked*.
+	 */
+	WINCE_ATOMIC_MAGIC(&mutexp->sharecount);
 
 	return (0);
 }
@@ -315,9 +346,11 @@ __db_win32_mutex_readlock_int(env, mutex, nowait)
 	DB_MUTEXMGR *mtxmgr;
 	DB_MUTEXREGION *mtxregion;
 	HANDLE event;
+	MUTEX_STATE *state;
 	u_int32_t nspins;
-	int ms, ret;
-	long exch_ret, mtx_val;
+	int max_ms, ms, ret;
+	long mtx_val;
+
 #ifdef MUTEX_DIAG
 	LARGE_INTEGER diag_now;
 #endif
@@ -342,11 +375,23 @@ __db_win32_mutex_readlock_int(env, mutex, nowait)
 	event = NULL;
 	ms = 50;
 	ret = 0;
+
+	state = NULL;
+	if (env->thr_hashtab != NULL && (ret = __mutex_record_lock(env,
+	    mutex, MUTEX_ACTION_INTEND_SHARE, &state)) != 0)
+		return (ret);
+#ifdef HAVE_FAILCHK_BROADCAST
 	/*
-	 * This needs to be initialized, since if mutexp->tas
-	 * is write locked on the first pass, it needs a value.
+	 * Limit WaitForSingleObject() sleeps to at most the failchk timeout,
+	 * and least 1 millisecond. When failchk broadcasting is not
+	 * supported check at least every second.
 	 */
-	exch_ret = 0;
+	if (dbenv->mutex_failchk_timeout != 0 &&
+	    (max_ms = (dbenv->mutex_failchk_timeout / US_PER_MS)) == 0)
+		max_ms = 1;
+	else
+#endif
+		max_ms = MS_PER_SEC;
 
 loop:	/* Attempt to acquire the resource for N spins. */
 	for (nspins =
@@ -357,9 +402,10 @@ loop:	/* Attempt to acquire the resource for N spins. */
 		 */
 retry:		mtx_val = atomic_read(&mutexp->sharecount);
 		if (mtx_val == MUTEX_SHARE_ISEXCLUSIVE) {
-			if (nowait)
-				return (DB_LOCK_NOTGRANTED);
-
+			if (nowait) {
+				ret = DB_LOCK_NOTGRANTED;
+				goto failed;
+			}
 			continue;
 		} else if (!atomic_compare_exchange(env, &mutexp->sharecount,
 		    mtx_val, mtx_val + 1)) {
@@ -370,6 +416,15 @@ retry:		mtx_val = atomic_read(&mutexp->sharecount);
 			MUTEX_PAUSE
 			goto retry;
 		}
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+		    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+			InterlockedDecrement(
+			    (interlocked_val)&mutexp->sharecount);
+			ret = __mutex_died(env, mutex);
+			goto failed;
+		}
+#endif
 
 #ifdef HAVE_STATISTICS
 		if (event == NULL)
@@ -384,12 +439,14 @@ retry:		mtx_val = atomic_read(&mutexp->sharecount);
 			if (ret != WAIT_OBJECT_0) {
 				QueryPerformanceCounter(&diag_now);
 				printf(DB_STR_A("2007",
-				    "[%I64d]: Lost signal on mutex %p, "
-				    "id %d, ms %d\n", "%I64d %p %d %d"),
+				    "[%lld]: Lost signal on mutex %p, "
+				    "id %d, ms %d\n", "%lld %p %d %d"),
 				    diag_now.QuadPart, mutexp, mutexp->id, ms);
 			}
 #endif
 		}
+		if (state != NULL)
+			state->action = MUTEX_ACTION_SHARED;
 
 #ifdef DIAGNOSTIC
 		/*
@@ -404,17 +461,17 @@ retry:		mtx_val = atomic_read(&mutexp->sharecount);
 	}
 
 	/*
-	 * Yield the processor; wait 50 ms initially, up to 1 second.  This
-	 * loop is needed to work around a race where the signal from the
-	 * unlocking thread gets lost.  We start at 50 ms because it's unlikely
-	 * to happen often and we want to avoid wasting CPU.
+	 * Yield the processor; wait 50 ms initially, up to 1 second or the
+	 * failchk timeout. This loop works around a race where the signal from
+	 * the unlocking thread gets lost.  We start at 50 ms because it's
+	 * unlikely to happen often and we want to avoid wasting CPU.
 	 */
 	if (event == NULL) {
 #ifdef MUTEX_DIAG
 		QueryPerformanceCounter(&diag_now);
 		printf(DB_STR_A("2008",
-		    "[%I64d]: Waiting on mutex %p, id %d\n",
-		    "%I64d %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
+		    "[%lld]: Waiting on mutex %p, id %d\n",
+		    "%lld %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
 #endif
 		InterlockedIncrement(&mutexp->nwaiters);
 		if ((ret = get_handle(env, mutexp, &event)) != 0)
@@ -424,12 +481,32 @@ retry:		mtx_val = atomic_read(&mutexp->sharecount);
 		ret = __os_get_syserr();
 		goto err;
 	}
-	if ((ms <<= 1) > MS_PER_SEC)
-		ms = MS_PER_SEC;
+
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+	    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+		(void)atomic_compare_exchange(env,
+		    &mutexp->sharecount, mtx_val, mtx_val - 1);
+		ret = __mutex_died(env, mutex);
+		goto failed;
+	}
+#endif
 
 	PANIC_CHECK(env);
+
+	if ((ms <<= 1) > max_ms)
+		ms = max_ms;
 	goto loop;
 
+failed:
+	if (event != NULL) {
+		CloseHandle(event);
+		InterlockedDecrement(&mutexp->nwaiters);
+	}
+	if (state != NULL)
+		state->action = MUTEX_ACTION_UNLOCKED;
+	return (ret);
+
 err:	__db_syserr(env, ret, DB_STR("2009",
 	    "Win32 read lock failed"));
 	return (__env_panic(env, __os_posix_err(ret)));
@@ -482,7 +559,8 @@ __db_win32_mutex_unlock(env, mutex)
 	DB_ENV *dbenv;
 	DB_MUTEX *mutexp;
 	HANDLE event;
-	int ret;
+	int ret, sharecount;
+	char description[DB_MUTEX_DESCRIBE_STRLEN];
 #ifdef MUTEX_DIAG
 	LARGE_INTEGER diag_now;
 #endif
@@ -510,6 +588,16 @@ __db_win32_mutex_unlock(env, mutex)
 	 */
 #ifdef HAVE_SHARED_LATCHES
 	if (F_ISSET(mutexp, DB_MUTEX_SHARED)) {
+		sharecount = atomic_read(&mutexp->sharecount);
+		if (sharecount == 0) {
+			if (!PANIC_ISSET(env)) {
+				__db_errx(env, DB_STR_A("2071",
+				    "Shared unlock %s: already unlocked", "%s"),
+				    __mutex_describe(env, mutex, description));
+				return (DB_RUNRECOVERY);
+			}
+			return (__env_panic(env, EACCES));
+		}
 		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
 			F_CLR(mutexp, DB_MUTEX_LOCKED);
 			if ((ret = InterlockedExchange(
@@ -519,12 +607,26 @@ __db_win32_mutex_unlock(env, mutex)
 				ret = DB_RUNRECOVERY;
 				goto err;
 			}
-		} else if (InterlockedDecrement(
-		    (interlocked_val)(&atomic_read(&mutexp->sharecount))) > 0)
-			return (0);
+		} else {
+			if (env->thr_hashtab != NULL &&
+			    (ret = __mutex_record_unlock(env, mutex)) != 0)
+			    return (ret);
+			if (InterlockedDecrement((interlocked_val)
+			    (&atomic_read(&mutexp->sharecount))) > 0)
+				return (0);
+		}
 	} else
 #endif
 	{
+		if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
+			if (!PANIC_ISSET(env)) {
+				__db_errx(env, DB_STR_A("2072",
+				    "Unlock %s: already unlocked", "%s"),
+				    __mutex_describe(env, mutex, description));
+				return (DB_RUNRECOVERY);
+			}
+			return (__env_panic(env, EACCES));
+		}
 		F_CLR(mutexp, DB_MUTEX_LOCKED);
 		MUTEX_UNSET(&mutexp->tas);
 	}
@@ -536,8 +638,8 @@ __db_win32_mutex_unlock(env, mutex)
 #ifdef MUTEX_DIAG
 		QueryPerformanceCounter(&diag_now);
 		printf(DB_STR_A("2011",
-		    "[%I64d]: Signalling mutex %p, id %d\n",
-		    "%I64d %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
+		    "[%lld]: Signalling mutex %p, id %d\n",
+		    "%lld %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
 #endif
 		if (!PulseEvent(event)) {
 			ret = __os_get_syserr();
diff --git a/src/mutex/test_mutex.c b/src/mutex/test_mutex.c
index 24c18016..d6183bdb 100644
--- a/src/mutex/test_mutex.c
+++ b/src/mutex/test_mutex.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * Standalone mutex tester for Berkeley DB mutexes.
  *
@@ -13,7 +13,6 @@
 #include "db_int.h"
 
 #ifdef DB_WIN32
-#define	MUTEX_THREAD_TEST	1
 
 extern int getopt(int, char * const *, const char *);
 
@@ -33,29 +32,13 @@ typedef HANDLE os_thread_t;
 #include <sys/wait.h>
 
 typedef pid_t os_pid_t;
-
-/*
- * There's only one mutex implementation that can't support thread-level
- * locking: UNIX/fcntl mutexes.
- *
- * The general Berkeley DB library configuration doesn't look for the POSIX
- * pthread functions, with one exception -- pthread_yield.
- *
- * Use these two facts to decide if we're going to build with or without
- * threads.
- */
-#if !defined(HAVE_MUTEX_FCNTL) && defined(HAVE_PTHREAD_YIELD)
-#define	MUTEX_THREAD_TEST	1
-
-#include <pthread.h>
-
 typedef pthread_t os_thread_t;
 
 #define	os_thread_create(thrp, attr, func, arg)				\
     pthread_create((thrp), (attr), (func), (arg))
 #define	os_thread_join(thr, statusp) pthread_join((thr), (statusp))
 #define	os_thread_self() pthread_self()
-#endif /* HAVE_PTHREAD_YIELD */
+
 #endif /* !DB_WIN32 */
 
 #define	OS_BAD_PID ((os_pid_t)-1)
@@ -76,28 +59,25 @@ typedef struct {
 	u_int	   wakeme;			/* Request to awake. */
 } TM;
 
-DB_ENV	*dbenv;					/* Backing environment */
+DB_ENV	*dbenv;					/* Backing environment. */
 ENV	*env;
 size_t	 len;					/* Backing data chunk size. */
 
+u_int	alignment = 0;				/* Specify mutex alignment. */
+
 u_int8_t *gm_addr;				/* Global mutex */
 u_int8_t *lm_addr;				/* Locker mutexes */
 u_int8_t *tm_addr;				/* Thread mutexes */
 
-#ifdef MUTEX_THREAD_TEST
 os_thread_t *kidsp;				/* Locker threads */
 os_thread_t  wakep;				/* Wakeup thread */
-#endif
 
 #ifndef	HAVE_MMAP
 u_int	nprocs = 1;				/* -p: Processes. */
 u_int	nthreads = 20;				/* -t: Threads. */
-#elif	MUTEX_THREAD_TEST
+#else
 u_int	nprocs = 5;				/* -p: Processes. */
 u_int	nthreads = 4;				/* -t: Threads. */
-#else
-u_int	nprocs = 20;				/* -p: Processes. */
-u_int	nthreads = 1;				/* -t: Threads. */
 #endif
 
 u_int	maxlocks = 20;				/* -l: Backing locks. */
@@ -147,8 +127,11 @@ main(argc, argv)
 	rtype = PARENT;
 	id = 0;
 	tmpath = argv[0];
-	while ((ch = getopt(argc, argv, "l:n:p:T:t:v")) != EOF)
+	while ((ch = getopt(argc, argv, "a:l:n:p:T:t:v")) != EOF)
 		switch (ch) {
+		case 'a':
+			alignment = (u_int)atoi(optarg);
+			break;
 		case 'l':
 			maxlocks = (u_int)atoi(optarg);
 			break;
@@ -161,14 +144,6 @@ main(argc, argv)
 		case 't':
 			if ((nthreads = (u_int)atoi(optarg)) == 0)
 				nthreads = 1;
-#if !defined(MUTEX_THREAD_TEST)
-			if (nthreads != 1) {
-				fprintf(stderr,
-    "%s: thread support not available or not compiled for this platform.\n",
-				    progname);
-				return (EXIT_FAILURE);
-			}
-#endif
 			break;
 		case 'T':
 			if (!memcmp(optarg, "locker", sizeof("locker") - 1))
@@ -242,7 +217,11 @@ main(argc, argv)
 	 *
 	 * Clean up from any previous runs.
 	 */
+#ifdef DB_WIN32
+	snprintf(cmd, sizeof(cmd), "rmdir /S /Q %s", TESTDIR);
+#else
 	snprintf(cmd, sizeof(cmd), "rm -rf %s", TESTDIR);
+#endif
 	(void)system(cmd);
 	snprintf(cmd, sizeof(cmd), "mkdir %s", TESTDIR);
 	(void)system(cmd);
@@ -292,8 +271,8 @@ main(argc, argv)
 
 		/* Wait for all lockers to exit. */
 		if ((err = os_wait(pids, nprocs)) != 0) {
-			fprintf(stderr, "%s: locker wait failed with %d\n",
-			    progname, err);
+			fprintf(stderr, "%s: locker wait failed with %s\n",
+			    progname, db_strerror(err));
 			goto fail;
 		}
 
@@ -357,7 +336,6 @@ int
 locker_start(id)
 	u_long id;
 {
-#if defined(MUTEX_THREAD_TEST)
 	u_int i;
 	int err;
 
@@ -378,17 +356,13 @@ locker_start(id)
 			return (1);
 		}
 	return (0);
-#else
-	return (run_lthread((void *)id) == NULL ? 0 : 1);
-#endif
 }
 
 int
 locker_wait()
 {
-#if defined(MUTEX_THREAD_TEST)
 	u_int i;
-	void *retp;
+	void *retp = NULL;
 
 	/* Wait for the threads to exit. */
 	for (i = 0; i < nthreads; i++) {
@@ -400,7 +374,6 @@ locker_wait()
 		}
 	}
 	free(kidsp);
-#endif
 	return (0);
 }
 
@@ -414,11 +387,7 @@ run_lthread(arg)
 	int err, i;
 
 	id = (u_long)arg;
-#if defined(MUTEX_THREAD_TEST)
 	tid = (u_long)os_thread_self();
-#else
-	tid = 0;
-#endif
 	printf("Locker: ID %03lu (PID: %lu; TID: %lx)\n",
 	    id, (u_long)getpid(), tid);
 
@@ -534,7 +503,6 @@ int
 wakeup_start(id)
 	u_long id;
 {
-#if defined(MUTEX_THREAD_TEST)
 	int err;
 
 	/*
@@ -547,16 +515,12 @@ wakeup_start(id)
 		return (1);
 	}
 	return (0);
-#else
-	return (run_wthread((void *)id) == NULL ? 0 : 1);
-#endif
 }
 
 int
 wakeup_wait()
 {
-#if defined(MUTEX_THREAD_TEST)
-	void *retp;
+	void *retp = NULL;
 
 	/*
 	 * A file is created when the wakeup thread is no longer needed.
@@ -567,7 +531,6 @@ wakeup_wait()
 		    "%s: wakeup thread exited with error\n", progname);
 		return (1);
 	}
-#endif
 	return (0);
 }
 
@@ -586,11 +549,7 @@ run_wthread(arg)
 
 	id = (u_long)arg;
 	quitcheck = 0;
-#if defined(MUTEX_THREAD_TEST)
 	tid = (u_long)os_thread_self();
-#else
-	tid = 0;
-#endif
 	printf("Wakeup: ID %03lu (PID: %lu; TID: %lx)\n",
 	    id, (u_long)getpid(), tid);
 
@@ -683,6 +642,12 @@ tm_env_init()
 		home = TESTDIR;
 	if (nthreads != 1)
 		flags |= DB_THREAD;
+	if (alignment != 0 &&
+	    (ret = dbenv->mutex_set_align(dbenv, alignment)) != 0) {
+		dbenv->err(dbenv, ret, "set_align(%d): %s", alignment, home);
+		return (1);
+	}
+
 	if ((ret = dbenv->open(dbenv, home, flags, 0)) != 0) {
 		dbenv->err(dbenv, ret, "environment open: %s", home);
 		return (1);
@@ -748,8 +713,10 @@ tm_mutex_init()
 	if (verbose)
 		printf("\n");
 
-	if (verbose)
+	if (verbose) {
+		(void)dbenv->mutex_stat_print(dbenv, DB_STAT_ALL);
 		printf("Allocate %d per-lock mutexes: ", maxlocks);
+	}
 	for (i = 0; i < maxlocks; ++i) {
 		mp = (TM *)(lm_addr + i * sizeof(TM));
 		if ((err = dbenv->mutex_alloc(dbenv, 0, &mp->mutex)) != 0) {
@@ -930,7 +897,7 @@ int
 usage()
 {
 	fprintf(stderr, "usage: %s %s\n\t%s\n", progname,
-	    "[-v] [-l maxlocks]",
+	    "[-a alignment] [-v] [-l maxlocks]",
 	    "[-n locks] [-p procs] [-T locker=ID|wakeup=ID] [-t threads]");
 	return (EXIT_FAILURE);
 }
diff --git a/src/mutex/uts4_cc.s b/src/mutex/uts4_cc.s
index 4f59e9c8..76eeed6c 100644
--- a/src/mutex/uts4_cc.s
+++ b/src/mutex/uts4_cc.s
@@ -1,6 +1,6 @@
  / See the file LICENSE for redistribution information.
  /
- / Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ / Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  /
  / $Id$
  /
diff --git a/src/os/os_abort.c b/src/os/os_abort.c
index 68b4bc05..72ac6751 100644
--- a/src/os/os_abort.c
+++ b/src/os/os_abort.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -13,11 +13,11 @@
 /*
  * __os_abort --
  *
- * PUBLIC: void __os_abort __P((ENV *));
+ * PUBLIC: void __os_abort __P((const ENV *));
  */
 void
 __os_abort(env)
-	ENV *env;
+	const ENV *env;
 {
 	__os_stack(env);		/* Try and get a stack trace. */
 
diff --git a/src/os/os_abs.c b/src/os/os_abs.c
index 4a1a5abd..a241c653 100644
--- a/src/os/os_abs.c
+++ b/src/os/os_abs.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_addrinfo.c b/src/os/os_addrinfo.c
index 205f41ec..aec30386 100644
--- a/src/os/os_addrinfo.c
+++ b/src/os/os_addrinfo.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2006, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2006, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_alloc.c b/src/os/os_alloc.c
index fb7bf109..478924df 100644
--- a/src/os/os_alloc.c
+++ b/src/os/os_alloc.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -11,7 +11,7 @@
 #include "db_int.h"
 
 #ifdef DIAGNOSTIC
-static void __os_guard __P((ENV *));
+static void __os_guard __P((const ENV *));
 
 typedef union {
 	size_t size;
@@ -204,11 +204,11 @@ __os_strdup(env, str, storep)
  * __os_calloc --
  *	The calloc(3) function for DB.
  *
- * PUBLIC: int __os_calloc __P((ENV *, size_t, size_t, void *));
+ * PUBLIC: int __os_calloc __P((const ENV *, size_t, size_t, void *));
  */
 int
 __os_calloc(env, num, size, storep)
-	ENV *env;
+	const ENV *env;
 	size_t num, size;
 	void *storep;
 {
@@ -227,11 +227,11 @@ __os_calloc(env, num, size, storep)
  * __os_malloc --
  *	The malloc(3) function for DB.
  *
- * PUBLIC: int __os_malloc __P((ENV *, size_t, void *));
+ * PUBLIC: int __os_malloc __P((const ENV *, size_t, void *));
  */
 int
 __os_malloc(env, size, storep)
-	ENV *env;
+	const ENV *env;
 	size_t size;
 	void *storep;
 {
@@ -261,9 +261,11 @@ __os_malloc(env, size, storep)
 		 * Windows/NT in an MT environment.
 		 */
 		if ((ret = __os_get_errno_ret_zero()) == 0) {
-			ret = ENOMEM;
+			ret = USR_ERR(env, ENOMEM);
 			__os_set_errno(ENOMEM);
 		}
+		else
+			(void)USR_ERR(env, ret);
 		__db_err(env, ret, DB_STR_A("0147", "malloc: %lu", "%lu"),
 		    (u_long)size);
 		return (ret);
@@ -292,11 +294,11 @@ __os_malloc(env, size, storep)
  * __os_realloc --
  *	The realloc(3) function for DB.
  *
- * PUBLIC: int __os_realloc __P((ENV *, size_t, void *));
+ * PUBLIC: int __os_realloc __P((const ENV *, size_t, void *));
  */
 int
 __os_realloc(env, size, storep)
-	ENV *env;
+	const ENV *env;
 	size_t size;
 	void *storep;
 {
@@ -345,7 +347,7 @@ __os_realloc(env, size, storep)
 		 * Windows/NT in an MT environment.
 		 */
 		if ((ret = __os_get_errno_ret_zero()) == 0) {
-			ret = ENOMEM;
+			ret = USR_ERR(env, ENOMEM);
 			__os_set_errno(ENOMEM);
 		}
 		__db_err(env, ret, DB_STR_A("0148", "realloc: %lu", "%lu"),
@@ -368,11 +370,11 @@ __os_realloc(env, size, storep)
  * __os_free --
  *	The free(3) function for DB.
  *
- * PUBLIC: void __os_free __P((ENV *, void *));
+ * PUBLIC: void __os_free __P((const ENV *, void *));
  */
 void
 __os_free(env, ptr)
-	ENV *env;
+	const ENV *env;
 	void *ptr;
 {
 #ifdef DIAGNOSTIC
@@ -416,7 +418,7 @@ __os_free(env, ptr)
  */
 static void
 __os_guard(env)
-	ENV *env;
+	const ENV *env;
 {
 	__db_errx(env, DB_STR("0149",
 	    "Guard byte incorrect during free"));
diff --git a/src/os/os_clock.c b/src/os/os_clock.c
index 25eeb704..78f1c8df 100644
--- a/src/os/os_clock.c
+++ b/src/os/os_clock.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -14,11 +14,15 @@
  * __os_gettime --
  *	Return the current time-of-day clock in seconds and nanoseconds.
  *
- * PUBLIC: void __os_gettime __P((ENV *, db_timespec *, int));
+ * If you want the time of day, pass 0 in the monotonic argument.  If you pass
+ * non-zero, you might get time-of-day or you might get a non-decreasing number
+ * which is unrelated to the time of day, such as the seconds since system boot.
+ *
+ * PUBLIC: void __os_gettime __P((const ENV *, db_timespec *, int));
  */
 void
 __os_gettime(env, tp, monotonic)
-	ENV *env;
+	const ENV *env;
 	db_timespec *tp;
 	int monotonic;
 {
@@ -35,7 +39,6 @@ __os_gettime(env, tp, monotonic)
 		RETRY_CHK((clock_gettime(
 		    CLOCK_REALTIME, (struct timespec *)tp)), ret);
 
-	RETRY_CHK((clock_gettime(CLOCK_REALTIME, (struct timespec *)tp)), ret);
 	if (ret != 0) {
 		sc = "clock_gettime";
 		goto err;
@@ -69,5 +72,5 @@ __os_gettime(env, tp, monotonic)
 	return;
 
 err:	__db_syserr(env, ret, "%s", sc);
-	(void)__env_panic(env, __os_posix_err(ret));
+	(void)__env_panic((ENV *) env, __os_posix_err(ret));
 }
diff --git a/src/os/os_config.c b/src/os/os_config.c
index c455a349..3fe2f045 100644
--- a/src/os/os_config.c
+++ b/src/os/os_config.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_cpu.c b/src/os/os_cpu.c
index 6b7f9f1e..53cadecb 100644
--- a/src/os/os_cpu.c
+++ b/src/os/os_cpu.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_ctime.c b/src/os/os_ctime.c
index 3f656c32..82925cc1 100644
--- a/src/os/os_ctime.c
+++ b/src/os/os_ctime.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -32,10 +32,7 @@ __os_ctime(tod, time_buf)
 	 * int.
 	 */
 #if defined(HAVE_VXWORKS)
-	{
-	size_t buflen = CTIME_BUFLEN;
-	(void)ctime_r(tod, time_buf, &buflen);
-	}
+	(void)ctime_r(tod, time_buf);
 #elif defined(HAVE_CTIME_R_3ARG)
 	(void)ctime_r(tod, time_buf, CTIME_BUFLEN);
 #elif defined(HAVE_CTIME_R)
diff --git a/src/os/os_dir.c b/src/os/os_dir.c
index 42bad194..7bd91bff 100644
--- a/src/os/os_dir.c
+++ b/src/os/os_dir.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_errno.c b/src/os/os_errno.c
index a8219f90..9bc15513 100644
--- a/src/os/os_errno.c
+++ b/src/os/os_errno.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_fid.c b/src/os/os_fid.c
index f2d80e25..43c61202 100644
--- a/src/os/os_fid.c
+++ b/src/os/os_fid.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_flock.c b/src/os/os_flock.c
index 904d5efe..8f58f244 100644
--- a/src/os/os_flock.c
+++ b/src/os/os_flock.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_fsync.c b/src/os/os_fsync.c
index 4b757b2c..377d7ff3 100644
--- a/src/os/os_fsync.c
+++ b/src/os/os_fsync.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_getenv.c b/src/os/os_getenv.c
index 05972112..b7c4e990 100644
--- a/src/os/os_getenv.c
+++ b/src/os/os_getenv.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_handle.c b/src/os/os_handle.c
index 8ae9dc7f..7dbe31e1 100644
--- a/src/os/os_handle.c
+++ b/src/os/os_handle.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -90,7 +90,7 @@ __os_openhandle(env, name, flags, mode, fhpp)
 					 * return EEXISTS.
 					 */
 					DB_END_SINGLE_THREAD;
-					ret = EEXIST;
+					ret = USR_ERR(env, EEXIST);
 					goto err;
 				}
 				/*
@@ -127,7 +127,10 @@ __os_openhandle(env, name, flags, mode, fhpp)
 			break;
 		}
 
-		switch (ret = __os_posix_err(__os_get_syserr())) {
+		ret = __os_posix_err(__os_get_syserr());
+		if (ret != ENOENT)
+			(void)USR_ERR(env, ret);
+		switch (ret) {
 		case EMFILE:
 		case ENFILE:
 		case ENOSPC:
@@ -160,9 +163,8 @@ __os_openhandle(env, name, flags, mode, fhpp)
 		/* Deny file descriptor access to any child process. */
 		if ((fcntl_flags = fcntl(fhp->fd, F_GETFD)) == -1 ||
 		    fcntl(fhp->fd, F_SETFD, fcntl_flags | FD_CLOEXEC) == -1) {
-			ret = __os_get_syserr();
-			__db_syserr(env, ret, DB_STR("0162",
-			    "fcntl(F_SETFD)"));
+			ret = USR_ERR(env, __os_get_syserr());
+			__db_syserr(env, ret, DB_STR("0162", "fcntl(F_SETFD)"));
 			ret = __os_posix_err(ret);
 			goto err;
 		}
@@ -226,6 +228,7 @@ __os_closehandle(env, fhp)
 		else
 			RETRY_CHK((close(fhp->fd)), ret);
 		if (ret != 0) {
+			ret = USR_ERR(env, ret);
 			__db_syserr(env, ret, DB_STR("0164", "close"));
 			ret = __os_posix_err(ret);
 		}
diff --git a/src/os/os_map.c b/src/os/os_map.c
index 0528f473..b17bf107 100644
--- a/src/os/os_map.c
+++ b/src/os/os_map.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -213,6 +213,15 @@ __os_attach(env, infop, rp)
 	if (rp->max < rp->size)
 		rp->max = rp->size;
 	if (ret == 0 && F_ISSET(infop, REGION_CREATE)) {
+#ifdef HAVE_MLOCK
+		/*
+		 * When locking the region in memory extend it fully so that it
+		 * can all be mlock()'d now, and not later when paging could
+		 * interfere with the application. [#21379]
+		 */
+		if (F_ISSET(env, ENV_LOCKDOWN))
+			rp->size = rp->max;
+#endif
 		if (F_ISSET(dbenv, DB_ENV_REGION_INIT))
 			ret = __db_file_write(env, infop->fhp,
 			    rp->size / MEGABYTE, rp->size % MEGABYTE, 0x00);
@@ -255,7 +264,7 @@ __os_detach(env, infop, destroy)
 {
 	DB_ENV *dbenv;
 	REGION *rp;
-	int ret;
+	int ret, t_ret;
 
 	/*
 	 * We pass a DB_ENV handle to the user's replacement unmap function,
@@ -263,8 +272,16 @@ __os_detach(env, infop, destroy)
 	 */
 	DB_ASSERT(env, env != NULL && env->dbenv != NULL);
 	dbenv = env->dbenv;
+	ret = 0;
 
+	/*
+	 * Don't use a region which is no longer valid, e.g., after the
+	 * env has been removed.
+	 */
 	rp = infop->rp;
+	if ((rp->id != 0 && rp->id != infop->id) ||
+	    rp->type <= INVALID_REGION_TYPE || rp->type > REGION_TYPE_MAX)
+		return (EINVAL);
 
 	/* If the user replaced the unmap call, call through their interface. */
 	if (DB_GLOBAL(j_region_unmap) != NULL)
@@ -314,16 +331,26 @@ __os_detach(env, infop, destroy)
 			return (ret);
 	}
 
+	if (F_ISSET(env, ENV_FORCESYNCENV))
+		if (msync(infop->addr, rp->max, MS_INVALIDATE | MS_SYNC) != 0) {
+			t_ret = __os_get_syserr();
+			__db_syserr(env, t_ret, DB_STR("0248",
+			    "msync failed on closing environment"));
+			if (ret == 0)
+				ret = t_ret;
+		}
+
 	if (munmap(infop->addr, rp->max) != 0) {
-		ret = __os_get_syserr();
-		__db_syserr(env, ret, DB_STR("0123", "munmap"));
-		return (__os_posix_err(ret));
+		t_ret = __os_get_syserr();
+		__db_syserr(env, t_ret, DB_STR("0123", "munmap"));
+		if (ret == 0)
+			ret = t_ret;
 	}
 
-	if (destroy && (ret = __os_unlink(env, infop->name, 1)) != 0)
-		return (ret);
+	if (destroy && (t_ret = __os_unlink(env, infop->name, 1)) != 0 && ret == 0)
+		ret = t_ret;
 
-	return (0);
+	return (ret);
 #else
 	COMPQUIET(destroy, 0);
 	COMPQUIET(ret, 0);
diff --git a/src/os/os_mkdir.c b/src/os/os_mkdir.c
index 800d445c..b3034e30 100644
--- a/src/os/os_mkdir.c
+++ b/src/os/os_mkdir.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_open.c b/src/os/os_open.c
index 5090c8e1..0c58848e 100644
--- a/src/os/os_open.c
+++ b/src/os/os_open.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_path.c b/src/os/os_path.c
index 478fdf45..b712b31a 100644
--- a/src/os/os_path.c
+++ b/src/os/os_path.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_pid.c b/src/os/os_pid.c
index b1b94d60..9efe4633 100644
--- a/src/os/os_pid.c
+++ b/src/os/os_pid.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -40,7 +40,7 @@ __os_id(dbenv, pidp, tidp)
 			*pidp = dbenv->env->pid_cache;
 	}
 
-/* 
+/*
  * When building on MinGW, we define both HAVE_PTHREAD_SELF and DB_WIN32,
  * and we are using pthreads instead of Windows threads implementation.
  * So here, we need to check the thread implementations before checking
diff --git a/src/os/os_rename.c b/src/os/os_rename.c
index 63aac7bb..1a3d7cbd 100644
--- a/src/os/os_rename.c
+++ b/src/os/os_rename.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_rmdir.c b/src/os/os_rmdir.c
new file mode 100644
index 00000000..ab3a1556
--- /dev/null
+++ b/src/os/os_rmdir.c
@@ -0,0 +1,38 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+/*
+ * __os_rmdir --
+ *	Remove a directory.
+ *
+ * PUBLIC: int __os_rmdir __P((ENV *, const char *));
+ */
+int
+__os_rmdir(env, name)
+	ENV *env;
+	const char *name;
+{
+	DB_ENV *dbenv;
+	int ret;
+
+	dbenv = env == NULL ? NULL : env->dbenv;
+	if (dbenv != NULL &&
+	    FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL))
+		__db_msg(env, DB_STR_A("0239", "fileops: rmdir %s",
+		    "%s"), name);
+
+	RETRY_CHK((rmdir(name)), ret);
+	if (ret != 0)
+		return (__os_posix_err(ret));
+
+	return (ret);
+}
diff --git a/src/os/os_root.c b/src/os/os_root.c
index 77e7a72c..6634a4a2 100644
--- a/src/os/os_root.c
+++ b/src/os/os_root.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_rpath.c b/src/os/os_rpath.c
index 16f3e54c..48c59b3d 100644
--- a/src/os/os_rpath.c
+++ b/src/os/os_rpath.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_rw.c b/src/os/os_rw.c
index c0967514..cc665ee4 100644
--- a/src/os/os_rw.c
+++ b/src/os/os_rw.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_seek.c b/src/os/os_seek.c
index 4676d33a..95408f3d 100644
--- a/src/os/os_seek.c
+++ b/src/os/os_seek.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_stack.c b/src/os/os_stack.c
index 037080f3..9844930f 100644
--- a/src/os/os_stack.c
+++ b/src/os/os_stack.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -14,32 +14,143 @@
 #include <execinfo.h>
 #endif
 
+#undef __DB_STACK_MAXFRAMES
+#define	__DB_STACK_MAXFRAMES	25
+
 /*
  * __os_stack --
- *	Output a stack trace to the message file handle.
+ *	Output a stack trace in a single write to the error file handle.
  *
- * PUBLIC: void __os_stack __P((ENV *));
+ * PUBLIC: void __os_stack __P((const ENV *));
  */
 void
 __os_stack(env)
-	ENV *env;
+	const ENV *env;
+{
+	/* Adjust by 2 to exclude __os_stack() and __os_stack_top(). */
+	__os_stack_top(env, __DB_STACK_MAXFRAMES - 2, 2);
+}
+
+/*
+ * __os_stack_top --
+ *	Output just a certain range of stack frames to the error file handle.
+ *
+ * PUBLIC: void __os_stack_top __P((const ENV *, unsigned, unsigned));
+ */
+void
+__os_stack_top(env, nframes, skipframes)
+	const ENV *env;
+	unsigned nframes;
+	unsigned skipframes;
 {
 #if defined(HAVE_BACKTRACE) && defined(HAVE_BACKTRACE_SYMBOLS)
-	void *array[200];
-	size_t i, size;
-	char **strings;
+	char buf[__DB_STACK_MAXFRAMES * 80];	/* Allow for 80 chars/line. */
 
+	__os_stack_text(env, buf, sizeof(buf), nframes, skipframes + 1);
+	__db_errx(env, "Top of stack:\n%s", buf);
+#else
+	COMPQUIET(env, NULL);
+	COMPQUIET(nframes, 0);
+	COMPQUIET(skipframes, 0);
+#endif
+}
+
+/*
+ * __os_stack_text --
+ *	'Print' the current stack into a char text buffer.
+ *
+ * PUBLIC: void __os_stack_text
+ * PUBLIC:     __P((const ENV *, char *, size_t, unsigned, unsigned));
+ */
+void
+__os_stack_text(env, result, bufsize, nframes, skip)
+	const ENV *env;
+	char *result;
+	size_t bufsize;
+	unsigned nframes;
+	unsigned skip;
+{
+	DB_MSGBUF mb;
+
+	DB_MSGBUF_INIT(&mb);
+	mb.buf = mb.cur = result;
+	mb.len = bufsize;
+	F_SET(&mb, DB_MSGBUF_PREALLOCATED);
+	__os_stack_msgadd(env, &mb, nframes, skip, NULL);
+}
+
+/*
+ * __os_stack_save --
+ *	Save a certain range of stack frames into the frames argument.
+ *
+ * PUBLIC: int __os_stack_save __P((const ENV *, unsigned, void **));
+ */
+int
+__os_stack_save(env, nframes, frames)
+	const ENV *env;
+	unsigned nframes;
+	void **frames;
+{
+	COMPQUIET(env, NULL);
+#if defined(HAVE_BACKTRACE) && defined(HAVE_BACKTRACE_SYMBOLS)
 	/*
 	 * Solaris and the GNU C library support this interface.  Solaris
 	 * has additional interfaces (printstack and walkcontext), I don't
 	 * know if they offer any additional value or not.
 	 */
-	size = backtrace(array, sizeof(array) / sizeof(array[0]));
-	strings = backtrace_symbols(array, size);
+	return ((int) backtrace(frames, nframes));
+#else
+	COMPQUIET(nframes, 0);
+	COMPQUIET(frames, NULL);
+	return (0);
+#endif
+}
+
+/*
+ * __os_stack_msgadd --
+ *	Decode a stack and add it to a DB_MSGBUF. The stack was either
+ *	previously obtained stack, e.g., from __os_stack_save(), or if it is
+ *	null, the current stack is fetched here.
+ *
+ * PUBLIC: void __os_stack_msgadd
+ * PUBLIC:       __P((const ENV *, DB_MSGBUF *, unsigned, unsigned, void **));
+ */
+void
+__os_stack_msgadd(env, mb, totalframes, skipframes, stack)
+	const ENV *env;
+	DB_MSGBUF *mb;
+	unsigned totalframes;
+	unsigned skipframes;
+	void **stack;
+{
+#if defined(HAVE_BACKTRACE) && defined(HAVE_BACKTRACE_SYMBOLS)
+	char **strings;
+	void *local_frames[__DB_STACK_MAXFRAMES];
+	unsigned i;
+
+	if (stack == NULL) {
+		stack = local_frames;
+		if (totalframes > __DB_STACK_MAXFRAMES)
+			totalframes = __DB_STACK_MAXFRAMES;
+		totalframes = backtrace(local_frames, totalframes);
+		skipframes++;
+	}
+
+	/*
+	 * Solaris and the GNU C library support this interface.  Solaris
+	 * has additional interfaces (printstack and walkcontext) which have
+	 * know if they offer any additional value or not.
+	 */
+	strings = backtrace_symbols(stack, totalframes);
 
-	for (i = 0; i < size; ++i)
-		__db_errx(env, "%s", strings[i]);
+	for (i = skipframes; i < totalframes; ++i)
+		__db_msgadd((ENV *)env, mb, "\t%s\n", strings[i]);
 	free(strings);
-#endif
+#else
 	COMPQUIET(env, NULL);
+	COMPQUIET(mb, NULL);
+	COMPQUIET(totalframes, 0);
+	COMPQUIET(skipframes, 0);
+	COMPQUIET(stack, NULL);
+#endif
 }
diff --git a/src/os/os_stat.c b/src/os/os_stat.c
index 43c66075..493531b7 100644
--- a/src/os/os_stat.c
+++ b/src/os/os_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_tmpdir.c b/src/os/os_tmpdir.c
index 06d35ba9..f41383d7 100644
--- a/src/os/os_tmpdir.c
+++ b/src/os/os_tmpdir.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_truncate.c b/src/os/os_truncate.c
index f559e9cb..473db9cc 100644
--- a/src/os/os_truncate.c
+++ b/src/os/os_truncate.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2004, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2004, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -14,14 +14,16 @@
  * __os_truncate --
  *	Truncate the file.
  *
- * PUBLIC: int __os_truncate __P((ENV *, DB_FH *, db_pgno_t, u_int32_t));
+ * PUBLIC: int __os_truncate
+ * PUBLIC:	__P((ENV *, DB_FH *, db_pgno_t, u_int32_t, off_t));
  */
 int
-__os_truncate(env, fhp, pgno, pgsize)
+__os_truncate(env, fhp, pgno, pgsize, relative)
 	ENV *env;
 	DB_FH *fhp;
 	db_pgno_t pgno;
 	u_int32_t pgsize;
+	off_t relative;
 {
 	DB_ENV *dbenv;
 	off_t offset;
@@ -33,7 +35,7 @@ __os_truncate(env, fhp, pgno, pgsize)
 	 * Truncate a file so that "pgno" is discarded from the end of the
 	 * file.
 	 */
-	offset = (off_t)pgsize * pgno;
+	offset = (off_t)pgsize * pgno + relative;
 
 	if (dbenv != NULL &&
 	    FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL))
diff --git a/src/os/os_uid.c b/src/os/os_uid.c
index 2e5c9f87..c3bccb3d 100644
--- a/src/os/os_uid.c
+++ b/src/os/os_uid.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -26,8 +26,6 @@ __os_unique_id(env, idp)
 	pid_t pid;
 	u_int32_t id;
 
-	*idp = 0;
-
 	dbenv = env == NULL ? NULL : env->dbenv;
 
 	/*
@@ -35,21 +33,60 @@ __os_unique_id(env, idp)
 	 * time of day and a stack address, all XOR'd together.
 	 */
 	__os_id(dbenv, &pid, NULL);
-	__os_gettime(env, &v, 1);
+	__os_gettime(env, &v, 0);
 
 	id = (u_int32_t)pid ^
 	    (u_int32_t)v.tv_sec ^ (u_int32_t)v.tv_nsec ^ P_TO_UINT32(&pid);
 
-	/*
-	 * We could try and find a reasonable random-number generator, but
-	 * that's not all that easy to do.  Seed and use srand()/rand(), if
-	 * we can find them.
-	 */
-	if (DB_GLOBAL(uid_init) == 0) {
-		DB_GLOBAL(uid_init) = 1;
-		srand((u_int)id);
-	}
-	id ^= (u_int)rand();
+	if (DB_GLOBAL(random_seeded) == 0)
+		__os_srandom(id);
+	id ^= __os_random();
 
 	*idp = id;
 }
+
+/*
+ * __os_srandom --
+ *	Set the random number generator seed for BDB.
+ * 
+ * PUBLIC: void __os_srandom __P((u_int));
+ */
+void
+__os_srandom(seed)
+	u_int seed;
+{
+	DB_GLOBAL(random_seeded) = 1;
+#ifdef HAVE_RANDOM_R
+	(void)initstate_r(seed, &DB_GLOBAL(random_state),
+	    sizeof(DB_GLOBAL(random_state)), &DB_GLOBAL(random_data));
+	(void)srandom_r(seed, &DB_GLOBAL(random_data));
+#elif defined(HAVE_RANDOM)
+	srandom(seed);
+#else
+	srand(seed);
+#endif
+}
+
+/*
+ * __os_random --
+ *	Return the next the random number generator for BDB.
+ * 
+ * PUBLIC: u_int __os_random __P((void));
+ */
+u_int
+__os_random()
+{
+#ifdef HAVE_RANDOM_R
+	int32_t result;
+#endif
+	if (DB_GLOBAL(random_seeded) == 0)
+		__os_srandom((u_int)time(NULL));
+#ifdef HAVE_RANDOM_R
+	random_r(&DB_GLOBAL(random_data), &result);
+	return ((u_int)result);
+#elif defined(HAVE_RANDOM)
+	return ((u_int)random());
+#else
+	return ((u_int)rand());
+#endif
+}
diff --git a/src/os/os_unlink.c b/src/os/os_unlink.c
index f9a0b688..9b6d26fa 100644
--- a/src/os/os_unlink.c
+++ b/src/os/os_unlink.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os/os_yield.c b/src/os/os_yield.c
index f0e170f0..ff54921e 100644
--- a/src/os/os_yield.c
+++ b/src/os/os_yield.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_qnx/os_qnx_fsync.c b/src/os_qnx/os_qnx_fsync.c
index 827fa446..6ea04b00 100644
--- a/src/os_qnx/os_qnx_fsync.c
+++ b/src/os_qnx/os_qnx_fsync.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_qnx/os_qnx_open.c b/src/os_qnx/os_qnx_open.c
index d0214a0d..cf2f781e 100644
--- a/src/os_qnx/os_qnx_open.c
+++ b/src/os_qnx/os_qnx_open.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_vxworks/os_vx_abs.c b/src/os_vxworks/os_vx_abs.c
index 69413ee5..78342fce 100644
--- a/src/os_vxworks/os_vx_abs.c
+++ b/src/os_vxworks/os_vx_abs.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_vxworks/os_vx_config.c b/src/os_vxworks/os_vx_config.c
index 649a3b4a..7c7fa4c8 100644
--- a/src/os_vxworks/os_vx_config.c
+++ b/src/os_vxworks/os_vx_config.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_vxworks/os_vx_map.c b/src/os_vxworks/os_vx_map.c
index 517cadae..859bde6c 100644
--- a/src/os_vxworks/os_vx_map.c
+++ b/src/os_vxworks/os_vx_map.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * This code is derived from software contributed to Sleepycat Software by
  * Frederick G.M. Roeber of Netscape Communications Corp.
diff --git a/src/os_vxworks/os_vx_rpath.c b/src/os_vxworks/os_vx_rpath.c
index 1ffd3549..d7202c78 100644
--- a/src/os_vxworks/os_vx_rpath.c
+++ b/src/os_vxworks/os_vx_rpath.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_vxworks/os_vx_yield.c b/src/os_vxworks/os_vx_yield.c
index c7c54cf2..e3741c3f 100644
--- a/src/os_vxworks/os_vx_yield.c
+++ b/src/os_vxworks/os_vx_yield.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/ce_ctime.c b/src/os_windows/ce_ctime.c
index e8ae76aa..d4e6a4fc 100644
--- a/src/os_windows/ce_ctime.c
+++ b/src/os_windows/ce_ctime.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -25,8 +25,8 @@ __os_ctime(tod, time_buf)
 	__int64 i64_tod;
 	struct _FILETIME file_tod, file_loc;
 	struct _SYSTEMTIME sys_loc;
-static const __int64 SECS_BETWEEN_EPOCHS = 11644473600;
-static const __int64 SECS_TO_100NS = 10000000; /* 10^7 */
+	static const __int64 SECS_BETWEEN_EPOCHS = 11644473600;
+	static const __int64 SECS_TO_100NS = 10000000; /* 10^7 */
 
 	strcpy(time_buf, "Thu Jan 01 00:00:00 1970");
 	time_buf[CTIME_BUFLEN - 1] = '\0';
diff --git a/src/os_windows/ce_freopen.c b/src/os_windows/ce_freopen.c
new file mode 100644
index 00000000..331450d0
--- /dev/null
+++ b/src/os_windows/ce_freopen.c
@@ -0,0 +1,52 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+/*
+ * __ce_freopen --
+ *	Reopen a stream on WinCE.
+ *
+ * PUBLIC: #ifdef DB_WINCE
+ * PUBLIC: FILE * __ce_freopen
+ * PUBLIC:     __P((const char *, const char *, FILE *));
+ * PUBLIC: #endif
+ */
+FILE *
+__ce_freopen(path, mode, stream)
+	const char *path, *mode;
+	FILE *stream;
+{
+	size_t lenm, lenp;
+	wchar_t *wpath, *wmode;
+	FILE *handle;
+
+	wpath = NULL;
+	wmode = NULL;
+	handle = NULL;
+	lenp = strlen(path) + 1;
+	lenm = strlen(mode) + 1;
+
+	if (__os_malloc(NULL, lenp * sizeof(wchar_t), &wpath) != 0 ||
+	    __os_malloc(NULL, lenm * sizeof(wchar_t), &wmode) != 0)
+		goto err;
+
+	if (mbstowcs(wpath, path, lenp) != lenp ||
+	    mbstowcs(wmode, mode, lenm) != lenm)
+		goto err;
+
+	handle = _wfreopen(wpath, wmode, stream);
+err:
+	if (wpath != NULL)
+		__os_free(NULL, wpath);
+	if (wmode != NULL)
+		__os_free(NULL, wmode);
+	return handle;
+}
diff --git a/src/os_windows/ce_gmtime.c b/src/os_windows/ce_gmtime.c
new file mode 100644
index 00000000..55605c89
--- /dev/null
+++ b/src/os_windows/ce_gmtime.c
@@ -0,0 +1,58 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2012, 2015 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+/*
+ * __ce_gmtime --
+ *	gmtime implementation on WinCE.
+ *
+ * PUBLIC: #ifdef DB_WINCE
+ * PUBLIC: struct tm * __ce_gmtime __P((const time_t *));
+ * PUBLIC: #endif
+ */
+
+struct tm *
+__ce_gmtime(timer)
+	const time_t *timer;
+{
+	static struct tm br_time;
+	struct tm *timep;
+	time_t ti;
+	unsigned long dayclock, dayno;
+	int year;
+
+	timep = &br_time;
+	ti = *timer;
+	dayclock = (unsigned long)ti % SECSPERDAY;
+	dayno = (unsigned long)ti / SECSPERDAY;
+	year = TM_YEAR_EPOCH;
+
+	timep->tm_sec = dayclock % 60;
+	timep->tm_min = (dayclock % 3600) / 60;
+	timep->tm_hour = dayclock / 3600;
+	/* day 0 was a thursday */
+	timep->tm_wday = (dayno + 4) % 7;
+	while (dayno >= year_lengths[isleap(year)]) {
+		dayno -= year_lengths[isleap(year)];
+		year++;
+	}
+	timep->tm_year = year - TM_YEAR_BASE;
+	timep->tm_yday = dayno;
+	timep->tm_mon = 0;
+	while (dayno >= mon_lengths[isleap(year)][timep->tm_mon]) {
+		dayno -= mon_lengths[isleap(year)][timep->tm_mon];
+		timep->tm_mon++;
+	}
+	timep->tm_mday = dayno + 1;
+	timep->tm_isdst = 0;
+
+	return timep;
+}
diff --git a/src/os_windows/ce_localtime.c b/src/os_windows/ce_localtime.c
new file mode 100644
index 00000000..23c53bed
--- /dev/null
+++ b/src/os_windows/ce_localtime.c
@@ -0,0 +1,44 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2012, 2015 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+/*
+ * __ce_localtime --
+ *	localtime implementation on WinCE.
+ *
+ * PUBLIC: #ifdef DB_WINCE
+ * PUBLIC: struct tm * localtime __P((const time_t *));
+ * PUBLIC: #endif
+ */
+struct tm *
+localtime(t)
+	const time_t *t;
+{
+	static struct tm y;
+	FILETIME uTm, lTm;
+	SYSTEMTIME pTm;
+	int64_t t64;
+
+	t64 = *t;
+	t64 = (t64 + 11644473600)*10000000;
+	uTm.dwLowDateTime = (DWORD)(t64 & 0xFFFFFFFF);
+	uTm.dwHighDateTime= (DWORD)(t64 >> 32);
+	FileTimeToLocalFileTime(&uTm,&lTm);
+	FileTimeToSystemTime(&lTm,&pTm);
+	y.tm_year = pTm.wYear - 1900;
+	y.tm_mon = pTm.wMonth - 1;
+	y.tm_wday = pTm.wDayOfWeek;
+	y.tm_mday = pTm.wDay;
+	y.tm_hour = pTm.wHour;
+	y.tm_min = pTm.wMinute;
+	y.tm_sec = pTm.wSecond;
+	return &y;
+}
diff --git a/src/os_windows/ce_mktime.c b/src/os_windows/ce_mktime.c
new file mode 100644
index 00000000..0d3a0906
--- /dev/null
+++ b/src/os_windows/ce_mktime.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 1987, 1989 Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Arthur David Olson of the National Cancer Institute.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.  */
+
+/*static char *sccsid = "from: @(#)ctime.c	5.26 (Berkeley) 2/23/91";*/
+
+/*
+ * This implementation of mktime is lifted straight from the NetBSD (BSD 4.4)
+ * version.  I modified it slightly to divorce it from the internals of the
+ * ctime library.  Thus this version can't use details of the internal
+ * timezone state file to figure out strange unnormalized struct tm values,
+ * as might result from someone doing date math on the tm struct then passing
+ * it to mktime.
+ *
+ * It just does as well as it can at normalizing the tm input, then does a
+ * binary search of the time space using the system's localtime() function.
+ *
+ * The original binary search was defective in that it didn't consider the
+ * setting of tm_isdst when comparing tm values, causing the search to be
+ * flubbed for times near the dst/standard time changeover.  The original
+ * code seems to make up for this by grubbing through the timezone info
+ * whenever the binary search barfed.  Since I don't have that luxury in
+ * portable code, I have to take care of tm_isdst in the comparison routine.
+ * This requires knowing how many minutes offset dst is from standard time.
+ *
+ * So, if you live somewhere in the world where dst is not 60 minutes offset,
+ * and your vendor doesn't supply mktime(), you'll have to edit this variable
+ * by hand.  Sorry about that.
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+#undef DSTMINUTES
+#define	DSTMINUTES 60
+
+#undef FALSE
+#undef TRUE
+#define	FALSE	0
+#define	TRUE	1
+
+/*
+** Adapted from code provided by Robert Elz, who writes:
+**	The "best" way to do mktime I think is based on an idea of Bob
+**	Kridle's (so its said...) from a long time ago. (mtxinu!kridle now).
+**	It does a binary search of the time_t space.  Since time_t's are
+**	just 32 bits, its a max of 32 iterations (even at 64 bits it
+**	would still be very reasonable).
+*/
+
+#undef WRONG
+#define	WRONG	(-1)
+
+const unsigned int mon_lengths[2][MONSPERYEAR] = {
+	{ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
+	{ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
+};
+const unsigned int year_lengths[2] = {
+	DAYSPERNYEAR, DAYSPERLYEAR
+};
+
+static void
+normalize(tensptr, unitsptr, base)
+	int base, *tensptr, *unitsptr;
+{
+	if (*unitsptr >= base) {
+		*tensptr += *unitsptr / base;
+		*unitsptr %= base;
+	} else if (*unitsptr < 0) {
+		--*tensptr;
+		*unitsptr += base;
+		if (*unitsptr < 0) {
+			*tensptr -= 1 + (-*unitsptr) / base;
+			*unitsptr = base - (-*unitsptr) % base;
+		}
+	}
+}
+
+static struct tm *
+mkdst(tmp)
+	struct tm * tmp;
+{
+	/* jds */
+	static struct tm tmbuf;
+
+	tmbuf = *tmp;
+	tmbuf.tm_isdst = 1;
+	tmbuf.tm_min += DSTMINUTES;
+	normalize(&tmbuf.tm_hour, &tmbuf.tm_min, MINSPERHOUR);
+	return &tmbuf;
+}
+
+static int
+tmcomp(atmp, btmp)
+	register struct tm *atmp, *btmp;
+{
+	register int result;
+
+	/* compare down to the same day */
+	if ((result = (atmp->tm_year - btmp->tm_year)) == 0 &&
+	    (result = (atmp->tm_mon - btmp->tm_mon)) == 0)
+		result = (atmp->tm_mday - btmp->tm_mday);
+
+	if (result != 0)
+		return result;
+
+	/* get rid of one-sided dst bias */
+	if (atmp->tm_isdst == 1 && !btmp->tm_isdst)
+		btmp = mkdst(btmp);
+	else if (btmp->tm_isdst == 1 && !atmp->tm_isdst)
+		atmp = mkdst(atmp);
+
+	/* compare the rest of the way */
+	if ((result = (atmp->tm_hour - btmp->tm_hour)) == 0 &&
+	    (result = (atmp->tm_min - btmp->tm_min)) == 0)
+		result = atmp->tm_sec - btmp->tm_sec;
+
+	return result;
+}
+
+static time_t
+time2(tmp, okayp, usezn)
+	struct tm *tmp;
+	int *okayp, usezn;
+{
+	register int bits, dir, i, saved_seconds;
+	time_t t;
+	struct tm yourtm, mytm;
+
+	*okayp = FALSE;
+	yourtm = *tmp;
+	if (yourtm.tm_sec >= SECSPERMIN + 2 || yourtm.tm_sec < 0)
+		normalize(&yourtm.tm_min, &yourtm.tm_sec, SECSPERMIN);
+	normalize(&yourtm.tm_hour, &yourtm.tm_min, MINSPERHOUR);
+	normalize(&yourtm.tm_mday, &yourtm.tm_hour, HOURSPERDAY);
+	normalize(&yourtm.tm_year, &yourtm.tm_mon, MONSPERYEAR);
+	while (yourtm.tm_mday <= 0) {
+		--yourtm.tm_year;
+		yourtm.tm_mday +=
+		    year_lengths[isleap(yourtm.tm_year + TM_YEAR_BASE)];
+	}
+	for ( ; ; ) {
+		i = mon_lengths[isleap(yourtm.tm_year +
+		    TM_YEAR_BASE)][yourtm.tm_mon];
+		if (yourtm.tm_mday <= i)
+			break;
+		yourtm.tm_mday -= i;
+		if (++yourtm.tm_mon >= MONSPERYEAR) {
+			yourtm.tm_mon = 0;
+			++yourtm.tm_year;
+		}
+	}
+	saved_seconds = yourtm.tm_sec;
+	yourtm.tm_sec = 0;
+	/*
+	** Calculate the number of magnitude bits in a time_t
+	** (this works regardless of whether time_t is
+	** signed or unsigned, though lint complains if unsigned).
+	*/
+	for (bits = 0, t = 1; t > 0; ++bits, t <<= 1)
+		;
+	/*
+	** If time_t is signed, then 0 is the median value,
+	** if time_t is unsigned, then 1 << bits is median.
+	*/
+	t = (t < 0) ? 0 : ((time_t) 1 << bits);
+	for ( ; ; ) {
+		if (usezn)
+			mytm = *localtime(&t);
+		else
+			mytm = *gmtime(&t);
+		dir = tmcomp(&mytm, &yourtm);
+		if (dir != 0) {
+			if (bits-- < 0)
+				return WRONG;
+			if (bits < 0)
+				--t;
+			else if (dir > 0)
+				t -= (time_t) 1 << bits;
+			else	t += (time_t) 1 << bits;
+			continue;
+		}
+		if (yourtm.tm_isdst < 0 || mytm.tm_isdst == yourtm.tm_isdst)
+			break;
+
+		return WRONG;
+	}
+	t += saved_seconds;
+	if (usezn)
+		*tmp = *localtime(&t);
+	else
+		*tmp = *gmtime(&t);
+	*okayp = TRUE;
+	return t;
+}
+
+static time_t
+time1(tmp)
+	struct tm * tmp;
+{
+	register time_t t;
+	int okay;
+
+	if (tmp->tm_isdst > 1)
+		tmp->tm_isdst = 1;
+	t = time2(tmp, &okay, 1);
+	if (okay || tmp->tm_isdst < 0)
+		return t;
+
+	return WRONG;
+}
+
+/*
+ * mktime --
+ *
+ * PUBLIC: #ifdef DB_WINCE
+ * PUBLIC: time_t __ce_mktime __P((struct tm *));
+ * PUBLIC: #endif
+ */
+time_t
+__ce_mktime(tmp)
+	struct tm * tmp;
+{
+	return time1(tmp);
+}
diff --git a/src/os_windows/ce_remove.c b/src/os_windows/ce_remove.c
new file mode 100644
index 00000000..f955f3b4
--- /dev/null
+++ b/src/os_windows/ce_remove.c
@@ -0,0 +1,26 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2012, 2015 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+/*
+ * remove implementation on WinCE.
+ *
+ * PUBLIC: #ifdef DB_WINCE
+ * PUBLIC: int __ce_remove __P((const char *path));
+ * PUBLIC: #endif
+ */
+
+int
+__ce_remove(path)
+	const char *path;
+{
+	return __os_unlink(NULL, path, 0);
+}
diff --git a/src/os_windows/ce_util_sig.c b/src/os_windows/ce_util_sig.c
new file mode 100644
index 00000000..11fb4ad7
--- /dev/null
+++ b/src/os_windows/ce_util_sig.c
@@ -0,0 +1,35 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2012, 2015 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+/*
+ * The stub functions for signal handling.
+ * WinCE does not support signal handling, so we just define stub functions to
+ * avoid linkage errors for utilities build.
+ */
+
+void
+__db_util_siginit()
+{
+	return;
+}
+
+int
+__db_util_interrupted()
+{
+	return (0);
+}
+
+void
+__db_util_sigresend()
+{
+	return;
+}
diff --git a/src/os_windows/os_abs.c b/src/os_windows/os_abs.c
index e769ab2c..f9be934e 100644
--- a/src/os_windows/os_abs.c
+++ b/src/os_windows/os_abs.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_clock.c b/src/os_windows/os_clock.c
index e548729b..80a96785 100644
--- a/src/os_windows/os_clock.c
+++ b/src/os_windows/os_clock.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -16,7 +16,7 @@
  */
 void
 __os_gettime(env, tp, monotonic)
-	ENV *env;
+	const ENV *env;
 	db_timespec *tp;
 	int monotonic;
 {
diff --git a/src/os_windows/os_config.c b/src/os_windows/os_config.c
index 4250dbd4..c4b61700 100644
--- a/src/os_windows/os_config.c
+++ b/src/os_windows/os_config.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_cpu.c b/src/os_windows/os_cpu.c
index 0922071f..41004753 100644
--- a/src/os_windows/os_cpu.c
+++ b/src/os_windows/os_cpu.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_dir.c b/src/os_windows/os_dir.c
index 31d364d7..4065d182 100644
--- a/src/os_windows/os_dir.c
+++ b/src/os_windows/os_dir.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_errno.c b/src/os_windows/os_errno.c
index ba8ec359..a8c35480 100644
--- a/src/os_windows/os_errno.c
+++ b/src/os_windows/os_errno.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_fid.c b/src/os_windows/os_fid.c
index f2d190b1..bfd4182c 100644
--- a/src/os_windows/os_fid.c
+++ b/src/os_windows/os_fid.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -118,12 +118,12 @@ __os_fileid(env, fname, unique_okay, fidp)
 			DB_GLOBAL(fid_serial) = (u_int32_t)pid;
 		} else
 			DB_GLOBAL(fid_serial) += 100000;
-
+		tmp = (u_int32_t)DB_GLOBAL(fid_serial);
 	} else {
 		tmp = (u_int32_t)fi.dwVolumeSerialNumber;
-		for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i)
-			*fidp++ = *p++;
 	}
+	for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i)
+		*fidp++ = *p++;
 
 	return (0);
 }
diff --git a/src/os_windows/os_flock.c b/src/os_windows/os_flock.c
index cb3e4986..9dcd1e81 100644
--- a/src/os_windows/os_flock.c
+++ b/src/os_windows/os_flock.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_fsync.c b/src/os_windows/os_fsync.c
index 8824aac1..5194c00b 100644
--- a/src/os_windows/os_fsync.c
+++ b/src/os_windows/os_fsync.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_getenv.c b/src/os_windows/os_getenv.c
index aad59d01..0ac1db0a 100644
--- a/src/os_windows/os_getenv.c
+++ b/src/os_windows/os_getenv.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_handle.c b/src/os_windows/os_handle.c
index e6edc3ef..65809017 100644
--- a/src/os_windows/os_handle.c
+++ b/src/os_windows/os_handle.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_map.c b/src/os_windows/os_map.c
index 8f646d68..eefa3e8b 100644
--- a/src/os_windows/os_map.c
+++ b/src/os_windows/os_map.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -105,9 +105,12 @@ __os_detach(env, infop, destroy)
 	int destroy;
 {
 	DB_ENV *dbenv;
+	REGION *rp;
 	int ret, t_ret;
 
 	dbenv = env->dbenv;
+	rp = infop->rp;
+	ret = 0;
 
 	if (infop->wnt_handle != NULL) {
 		(void)CloseHandle(infop->wnt_handle);
@@ -120,10 +123,19 @@ __os_detach(env, infop, destroy)
 			return (ret);
 	}
 
-	ret = !UnmapViewOfFile(infop->addr) ? __os_get_syserr() : 0;
-	if (ret != 0) {
-		__db_syserr(env, ret, DB_STR("0007", "UnmapViewOfFile"));
-		ret = __os_posix_err(ret);
+	if (F_ISSET(env, ENV_FORCESYNCENV))
+		if (!FlushViewOfFile(infop->addr, rp->max)) {
+			ret = __os_get_syserr();
+			__db_syserr(env, ret, DB_STR("0249",
+			    "FlushViewOfFile failed on closing environment"));
+			ret = __os_posix_err(ret);
+		}
+
+	t_ret = !UnmapViewOfFile(infop->addr) ? __os_get_syserr() : 0;
+	if (t_ret != 0) {
+		__db_syserr(env, t_ret, DB_STR("0007", "UnmapViewOfFile"));
+		if (ret == 0)
+			ret = __os_posix_err(t_ret);
 	}
 
 	if (!F_ISSET(env, ENV_SYSTEM_MEM) && destroy &&
diff --git a/src/os_windows/os_mkdir.c b/src/os_windows/os_mkdir.c
index b87f3f9d..7ad7eed2 100644
--- a/src/os_windows/os_mkdir.c
+++ b/src/os_windows/os_mkdir.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_open.c b/src/os_windows/os_open.c
index 44f2faf3..bc715a96 100644
--- a/src/os_windows/os_open.c
+++ b/src/os_windows/os_open.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_rename.c b/src/os_windows/os_rename.c
index 791f53a5..d70f20ca 100644
--- a/src/os_windows/os_rename.c
+++ b/src/os_windows/os_rename.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_rmdir.c b/src/os_windows/os_rmdir.c
new file mode 100644
index 00000000..18090f09
--- /dev/null
+++ b/src/os_windows/os_rmdir.c
@@ -0,0 +1,42 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+/*
+ * __os_rmdir --
+ *	Remove a directory.
+ */
+int
+__os_rmdir(env, name)
+	ENV *env;
+	const char *name;
+{
+	DB_ENV *dbenv;
+	_TCHAR *tname;
+	int ret;
+
+	dbenv = env == NULL ? NULL : env->dbenv;
+
+	if (dbenv != NULL &&
+	    FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL))
+		__db_msg(env, DB_STR_A("0240", "fileops: rmdir %s",
+		    "%s"), name);
+
+	TO_TSTRING(env, name, tname, ret);
+	if (ret != 0)
+		return (ret);
+	RETRY_CHK(!RemoveDirectory(tname), ret);
+	FREE_STRING(env, tname);
+	if (ret != 0)
+		return (__os_posix_err(ret));
+
+	return (ret);
+}
diff --git a/src/os_windows/os_rw.c b/src/os_windows/os_rw.c
index e64a7d08..20644e6e 100644
--- a/src/os_windows/os_rw.c
+++ b/src/os_windows/os_rw.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_seek.c b/src/os_windows/os_seek.c
index 7632c15d..613e4a7c 100644
--- a/src/os_windows/os_seek.c
+++ b/src/os_windows/os_seek.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_stat.c b/src/os_windows/os_stat.c
index 11248886..5c3a0fcc 100644
--- a/src/os_windows/os_stat.c
+++ b/src/os_windows/os_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_truncate.c b/src/os_windows/os_truncate.c
index fcbb37b2..d1150c85 100644
--- a/src/os_windows/os_truncate.c
+++ b/src/os_windows/os_truncate.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2004, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2004, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -15,11 +15,12 @@
  *	Truncate the file.
  */
 int
-__os_truncate(env, fhp, pgno, pgsize)
+__os_truncate(env, fhp, pgno, pgsize, relative)
 	ENV *env;
 	DB_FH *fhp;
 	db_pgno_t pgno;
 	u_int32_t pgsize;
+	off_t relative;
 {
 	/* Yes, this really is how Microsoft have designed their API */
 	union {
@@ -34,7 +35,7 @@ __os_truncate(env, fhp, pgno, pgsize)
 	int ret;
 
 	dbenv = env == NULL ? NULL : env->dbenv;
-	offset = (off_t)pgsize * pgno;
+	offset = (off_t)pgsize * pgno + relative;
 	ret = 0;
 
 	if (dbenv != NULL &&
@@ -84,7 +85,7 @@ __os_truncate(env, fhp, pgno, pgsize)
 	 * We can't switch to SetFilePointerEx, which knows about 64-bit
 	 * offsets, because it isn't supported on Win9x/ME.
 	 */
-	RETRY_CHK((off.bigint = (__int64)pgsize * pgno,
+	RETRY_CHK((off.bigint = (__int64)pgsize * pgno + relative,
 	    (SetFilePointer(fhp->trunc_handle, off.low, &off.high, FILE_BEGIN)
 	    == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) ||
 	    !SetEndOfFile(fhp->trunc_handle)), ret);
diff --git a/src/os_windows/os_unlink.c b/src/os_windows/os_unlink.c
index 6a0a6572..5c63a5e6 100644
--- a/src/os_windows/os_unlink.c
+++ b/src/os_windows/os_unlink.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/os_windows/os_yield.c b/src/os_windows/os_yield.c
index 0d32ef69..bf326ee2 100644
--- a/src/os_windows/os_yield.c
+++ b/src/os_windows/os_yield.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/qam/qam.c b/src/qam/qam.c
index e81d4795..0c71fd0d 100644
--- a/src/qam/qam.c
+++ b/src/qam/qam.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -260,7 +260,7 @@ __qamc_put(dbc, key, data, flags, pgnop)
 	}
 
 	if (exact != 0 && flags == DB_NOOVERWRITE)
-		ret = DB_KEYEXIST;
+		ret = DBC_ERR(dbc, DB_KEYEXIST);
 	else
 		/* Put the item on the page. */
 		ret = __qam_pitem(dbc,
@@ -526,7 +526,7 @@ __qamc_del(dbc, flags)
 		return (ret);
 
 	if (QAM_NOT_VALID(meta, cp->recno)) {
-		ret = DB_NOTFOUND;
+		ret = DBC_ERR(dbc, DB_NOTFOUND);
 		goto err;
 	}
 	first = meta->first_recno;
@@ -549,7 +549,7 @@ __qamc_del(dbc, flags)
 		goto err;
 
 	if (!exact) {
-		ret = DB_NOTFOUND;
+		ret = DBC_ERR(dbc, DB_NOTFOUND);
 		goto err;
 	}
 
@@ -636,9 +636,9 @@ __qamc_get(dbc, key, data, flags, pgnop)
 	QUEUE_CURSOR *cp;
 	db_lockmode_t lock_mode;
 	db_pgno_t metapno;
-	db_recno_t first;
+	db_recno_t first, old_first;
 	int exact, inorder, is_first, ret, t_ret, wait, with_delete;
-	int retrying;
+	int retrying, stay;
 	u_int32_t skip, meta_mode;
 
 	dbp = dbc->dbp;
@@ -652,7 +652,9 @@ __qamc_get(dbc, key, data, flags, pgnop)
 	meta = NULL;
 	*pgnop = 0;
 	pg = NULL;
-	retrying = t_ret = wait = with_delete = 0;
+	retrying =  t_ret = wait = with_delete = 0;
+	stay = 1;
+	old_first = RECNO_OOB;
 
 	if (flags == DB_CONSUME_WAIT) {
 		wait = 1;
@@ -676,25 +678,25 @@ __qamc_get(dbc, key, data, flags, pgnop)
 	t = (QUEUE *)dbp->q_internal;
 	metapno = t->q_meta;
 
-	/*
-	 * Get the meta page first
-	 */
-	if ((ret = __memp_fget(mpf, &metapno,
-	     dbc->thread_info, dbc->txn, meta_mode, &meta)) != 0)
-		return (ret);
-
 	/* Release any previous lock if not in a transaction. */
 	if ((ret = __TLPUT(dbc, cp->lock)) != 0)
 		goto err;
 
 	skip = 0;
-retry:	/* Update the record number. */
+retry:
+	/*
+	 * Get the meta page first
+	 */
+	if (meta == NULL && (ret = __memp_fget(mpf, &metapno,
+	     dbc->thread_info, dbc->txn, meta_mode, &meta)) != 0)
+		return (ret);	/* Update the record number. */
+
 	switch (flags) {
 	case DB_CURRENT:
 		break;
 	case DB_NEXT_DUP:
 	case DB_PREV_DUP:
-		ret = DB_NOTFOUND;
+		ret = DBC_ERR(dbc, DB_NOTFOUND);
 		goto err;
 		/* NOTREACHED */
 	case DB_NEXT:
@@ -711,7 +713,7 @@ retry:	/* Update the record number. */
 			if (QAM_AFTER_CURRENT(meta, cp->recno)) {
 				pg = NULL;
 				if (!wait) {
-					ret = DB_NOTFOUND;
+					ret = DBC_ERR(dbc, DB_NOTFOUND);
 					goto err;
 				}
 				/*
@@ -774,6 +776,7 @@ retry:	/* Update the record number. */
 				    DB_LOCK_UPGRADE, &metalock)) != 0) {
 					if (ret == DB_LOCK_DEADLOCK)
 						ret = DB_LOCK_NOTGRANTED;
+					(void)DBC_ERR(dbc, ret);
 					goto err;
 				}
 
@@ -792,6 +795,8 @@ retry:	/* Update the record number. */
 
 		/* get the first record number */
 		cp->recno = first = meta->first_recno;
+		if (old_first == RECNO_OOB)
+			old_first = first;
 
 		break;
 	case DB_PREV:
@@ -799,7 +804,7 @@ retry:	/* Update the record number. */
 		if (cp->recno != RECNO_OOB) {
 			if (cp->recno == meta->first_recno ||
 			   QAM_BEFORE_FIRST(meta, cp->recno)) {
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto err;
 			}
 			QAM_DEC_RECNO(cp->recno);
@@ -808,7 +813,7 @@ retry:	/* Update the record number. */
 		/* FALLTHROUGH */
 	case DB_LAST:
 		if (meta->first_recno == meta->cur_recno) {
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err;
 		}
 		cp->recno = meta->cur_recno;
@@ -892,11 +897,11 @@ dolock:	if (!with_delete || inorder || retrying) {
 					LOCK_INIT(lock);
 					goto release_retry;
 				}
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto lerr;
 			}
 			if (QAM_AFTER_CURRENT(meta, cp->recno)) {
-				ret = DB_NOTFOUND;
+				ret = DBC_ERR(dbc, DB_NOTFOUND);
 				goto lerr;
 			}
 		}
@@ -951,9 +956,37 @@ release_retry:	/* Release locks and retry, if possible. */
 		case DB_NEXT_NODUP:
 			if (!with_delete)
 				is_first = 0;
-			else if (first == cp->recno)
+			else if (first == cp->recno) {
 				/* we have verified that this record is gone. */
 				QAM_INC_RECNO(first);
+				/* 
+				 * If we are reading in order and the first
+				 * record was not there, we need to reflect
+				 * this in the meta page, so that we can
+				 * avoid checking this record again and again.
+				 */
+				if (inorder && cp->recno == meta->first_recno) {
+					if (DBC_LOGGING(dbc)) {
+#ifdef QDEBUG
+						(void)__log_printf(
+						    dbp->env, dbc->txn,
+						    "Queue O: %x %u %u %u",
+						    dbc->locker ? 
+						    dbc->locker->id : 0,
+						    cp->recno, first, 
+						    meta->cur_recno);
+#endif
+						if ((ret = __qam_incfirst_log(
+						    dbp, dbc->txn,
+						    &meta->dbmeta.lsn, 0,
+						    cp->recno,
+						    PGNO_BASE_MD)) != 0)
+							goto err;
+					} else
+						LSN_NOT_LOGGED(meta->dbmeta.lsn);
+					meta->first_recno = first;
+				}
+			}
 			if (QAM_BEFORE_FIRST(meta, cp->recno) &&
 			    DONT_NEED_LOCKS(dbc))
 				flags = DB_FIRST;
@@ -979,7 +1012,7 @@ release_retry:	/* Release locks and retry, if possible. */
 
 		default:
 			/* this is for the SET and GET_BOTH cases */
-			ret = DB_KEYEMPTY;
+			ret = DBC_ERR(dbc, DB_KEYEMPTY);
 			goto err1;
 		}
 		retrying = 0;
@@ -1031,10 +1064,10 @@ release_retry:	/* Release locks and retry, if possible. */
 		 */
 		tmp.data = qp->data;
 		tmp.size = t->re_len;
-		if ((ret = __bam_defcmp(dbp, data, &tmp)) != 0) {
+		if ((ret = __bam_defcmp(dbp, data, &tmp, NULL)) != 0) {
 			if (flags == DB_GET_BOTH_RANGE)
 				goto release_retry;
-			ret = DB_NOTFOUND;
+			ret = DBC_ERR(dbc, DB_NOTFOUND);
 			goto err1;
 		}
 	}
@@ -1139,14 +1172,17 @@ release_retry:	/* Release locks and retry, if possible. */
 		 * If we deleted the first record we checked then we moved
 		 * the first pointer  properly.
 		 */
-
-		if (first == cp->recno && (skip = (first % t->rec_page)) != 0)
+		if (((QUEUE *)dbp->q_internal)->page_ext != 0)
+			stay = (QAM_RECNO_EXTENT(dbp, old_first) ==
+			    QAM_RECNO_EXTENT(dbp, first));
+		if (stay && first == cp->recno &&
+		    (skip = (first % t->rec_page)) != 0)
 			goto done;
 		if (meta == NULL &&
 		     (ret = __memp_fget(mpf, &metapno,
 		     dbc->thread_info, dbc->txn, 0, &meta)) != 0)
 			goto err;
-		if (skip && !QAM_BEFORE_FIRST(meta, first))
+		if (stay && skip && !QAM_BEFORE_FIRST(meta, first))
 			goto done;
 
 #ifdef QDEBUG
@@ -1156,7 +1192,11 @@ release_retry:	/* Release locks and retry, if possible. */
 			    dbc->locker ? dbc->locker->id : 0,
 			    cp->recno, first, meta->first_recno);
 #endif
-		ret = __qam_consume(dbc, meta, first);
+		if (stay) {
+			ret = __qam_consume(dbc, meta, first);
+		} else {
+			ret = __qam_consume(dbc, meta, old_first);
+		}
 	}
 
 err1:	if (cp->page != NULL) {
@@ -1272,8 +1312,8 @@ __qam_consume(dbc, meta, first)
 		 */
 		if (rec_extent != 0 &&
 		    ((exact = (first % rec_extent == 0)) ||
-		    (first % meta->rec_page == 0) ||
-		    first == UINT32_MAX)) {
+		    (exact = (first == UINT32_MAX)) ||
+		    (first % meta->rec_page == 0))) {
 #ifdef QDEBUG
 			if (DBC_LOGGING(dbc))
 				(void)__log_printf(dbp->env, dbc->txn,
diff --git a/src/qam/qam.src b/src/qam/qam.src
index a8e2e4e0..eca6c07c 100644
--- a/src/qam/qam.src
+++ b/src/qam/qam.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/qam/qam_conv.c b/src/qam/qam_conv.c
index beb7c973..34ce321a 100644
--- a/src/qam/qam_conv.c
+++ b/src/qam/qam_conv.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/qam/qam_files.c b/src/qam/qam_files.c
index e9a9ff07..f5c7d2ec 100644
--- a/src/qam/qam_files.c
+++ b/src/qam/qam_files.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -445,6 +445,8 @@ __qam_fremove(dbp, pgnoaddr)
 		    * sizeof(array->mpfarray[0]));
 		array->mpfarray[
 		    array->hi_extent - array->low_extent].mpf = NULL;
+		array->mpfarray[
+		    array->hi_extent - array->low_extent].pinref = 0;
 		if (array->low_extent != array->hi_extent)
 			array->low_extent++;
 	} else {
@@ -570,8 +572,11 @@ again:
 	for (i = first; i >= first && i <= stop; i += rec_extent) {
 		if ((ret = __qam_fprobe(dbc, QAM_RECNO_PAGE(dbp, i),
 		    &fp->mpf, QAM_PROBE_MPF, dbp->priority, 0)) != 0) {
-			if (ret == ENOENT)
+			if (ret == ENOENT) {
+				/* Missing extents are acceptable; skip them. */
+				ret = 0;
 				continue;
+			}
 			goto err;
 		}
 		fp->id = QAM_RECNO_EXTENT(dbp, i);
diff --git a/src/qam/qam_method.c b/src/qam/qam_method.c
index 0867e5dd..5d796cdb 100644
--- a/src/qam/qam_method.c
+++ b/src/qam/qam_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/qam/qam_open.c b/src/qam/qam_open.c
index 69f6cb75..5be78f68 100644
--- a/src/qam/qam_open.c
+++ b/src/qam/qam_open.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/qam/qam_rec.c b/src/qam/qam_rec.c
index c9ff6c83..c5f6b3f4 100644
--- a/src/qam/qam_rec.c
+++ b/src/qam/qam_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -152,6 +152,10 @@ __qam_incfirst_recover(env, dbtp, lsnp, op, info)
 			REC_DIRTY(mpf, ip, dbc->priority, &meta);
 			LSN(meta) = *lsnp;
 		}
+		if (QAM_BEFORE_FIRST(meta, argp->recno)) {
+                        REC_DIRTY(mpf, ip, dbc->priority, &meta);
+                        meta->first_recno = argp->recno;
+		}
 		if ((ret = __qam_adjust_first(file_dbp,
 		    dbc, meta, argp->recno + 1)) != 0)
 			goto err;
diff --git a/src/qam/qam_stat.c b/src/qam/qam_stat.c
index 15c41bb5..19e09383 100644
--- a/src/qam/qam_stat.c
+++ b/src/qam/qam_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/qam/qam_stub.c b/src/qam/qam_stub.c
index f5140079..6df0536c 100644
--- a/src/qam/qam_stub.c
+++ b/src/qam/qam_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/qam/qam_upgrade.c b/src/qam/qam_upgrade.c
index ac96c889..4b9e9453 100644
--- a/src/qam/qam_upgrade.c
+++ b/src/qam/qam_upgrade.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/qam/qam_verify.c b/src/qam/qam_verify.c
index af5ab5db..d2f8ab79 100644
--- a/src/qam/qam_verify.c
+++ b/src/qam/qam_verify.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -115,14 +115,14 @@ __qam_vrfy_meta(dbp, vdp, meta, pgno, flags)
 	 * this assumption fails.  (We need the qp info to be reasonable
 	 * before we do per-page verification of queue extents.)
 	 */
-	if (F_ISSET(vdp, VRFY_QMETA_SET)) {
+	if (F_ISSET(vdp, SALVAGE_QMETA_SET)) {
 		isbad = 1;
 		EPRINT((env, DB_STR_A("1148",
 		    "Page %lu: database contains multiple Queue metadata pages",
 		    "%lu"), (u_long)pgno));
 		goto err;
 	}
-	F_SET(vdp, VRFY_QMETA_SET);
+	F_SET(vdp, SALVAGE_QMETA_SET);
 	qp->page_ext = meta->page_ext;
 	dbp->pgsize = meta->dbmeta.pagesize;
 	qp->q_meta = pgno;
diff --git a/src/rep/mlease.html b/src/rep/mlease.html
index 7d44b465..4e82f63c 100644
--- a/src/rep/mlease.html
+++ b/src/rep/mlease.html
@@ -1,5 +1,5 @@
 <!DOCTYPE doctype PUBLIC "-//w3c//dtd html 4.0 transitional//en">
-<!--Copyright (c) 2011, 2012 Oracle and/or its affiliates.  All rights reserved.-->
+<!--Copyright (c) 2011, 2015 Oracle and/or its affiliates.  All rights reserved.-->
 <html>
 <head>
   <meta http-equiv="Content-Type"
diff --git a/src/rep/rep.msg b/src/rep/rep.msg
index b751a64d..d5c56d93 100644
--- a/src/rep/rep.msg
+++ b/src/rep/rep.msg
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -57,7 +57,22 @@ ARG	pgsize		u_int32_t
 ARG	pgno		db_pgno_t
 ARG	max_pgno	db_pgno_t
 ARG	filenum		u_int32_t
-ARG	finfo_flags	u_int32_t		
+ARG	finfo_flags	u_int32_t
+ARG	type		u_int32_t
+ARG	db_flags	u_int32_t
+ARG	uid		DBT
+ARG	info		DBT
+ARG	dir		DBT
+ARG	blob_fid_lo	u_int32_t
+ARG	blob_fid_hi	u_int32_t
+END
+
+BEGIN_MSG fileinfo_v7	alloc check_length version
+ARG	pgsize		u_int32_t
+ARG	pgno		db_pgno_t
+ARG	max_pgno	db_pgno_t
+ARG	filenum		u_int32_t
+ARG	finfo_flags	u_int32_t
 ARG	type		u_int32_t
 ARG	db_flags	u_int32_t
 ARG	uid		DBT
@@ -158,3 +173,54 @@ ARG	lsn		DB_LSN
 ARG	hist_sec	u_int32_t
 ARG	hist_nsec	u_int32_t
 END
+
+/*
+ * Request for blob files.
+ */
+BEGIN_MSG blob_update_req
+ARG	blob_fid	u_int64_t
+ARG	blob_sid	u_int64_t
+ARG	blob_id		u_int64_t
+ARG	highest_id	u_int64_t
+END
+
+/*
+ * A list of blob file for a database.
+ */
+BEGIN_MSG blob_update
+ARG	blob_fid	u_int64_t
+ARG	highest_id	u_int64_t
+ARG	flags		u_int32_t
+ARG	num_blobs	u_int32_t
+END
+
+/*
+ * Blob file description, part of blob_update.
+ */
+BEGIN_MSG blob_file
+ARG	blob_sid	u_int64_t
+ARG	blob_id		u_int64_t
+ARG	blob_size	u_int64_t
+END
+
+/*
+ * A piece of data from a blob file.
+ */
+BEGIN_MSG blob_chunk
+ARG	flags		u_int32_t
+ARG	blob_fid	u_int64_t
+ARG	blob_sid	u_int64_t
+ARG	blob_id		u_int64_t
+ARG	offset		u_int64_t
+ARG	data		DBT
+END
+
+/*
+ * Request for data from a blob file at the given offset.
+ */
+BEGIN_MSG blob_chunk_req
+ARG	blob_fid	u_int64_t
+ARG	blob_sid	u_int64_t
+ARG	blob_id		u_int64_t
+ARG	offset		u_int64_t
+END	
diff --git a/src/rep/rep_automsg.c b/src/rep/rep_automsg.c
index 5d8155fb..cab68b3e 100644
--- a/src/rep/rep_automsg.c
+++ b/src/rep/rep_automsg.c
@@ -280,6 +280,16 @@ __rep_fileinfo_marshal(env, version, argp, bp, max, lenp)
 		memcpy(bp, argp->dir.data, argp->dir.size);
 		bp += argp->dir.size;
 	}
+	if (copy_only) {
+		memcpy(bp, &argp->blob_fid_lo, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->blob_fid_lo);
+	if (copy_only) {
+		memcpy(bp, &argp->blob_fid_hi, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->blob_fid_hi);
 
 	*lenp = (size_t)(bp - start);
 	return (0);
@@ -386,6 +396,16 @@ __rep_fileinfo_unmarshal(env, version, argpp, bp, max, nextp)
 	if (max < needed)
 		goto too_few;
 	bp += argp->dir.size;
+	if (copy_only) {
+		memcpy(&argp->blob_fid_lo, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->blob_fid_lo, bp);
+	if (copy_only) {
+		memcpy(&argp->blob_fid_hi, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->blob_fid_hi, bp);
 
 	if (nextp != NULL)
 		*nextp = bp;
@@ -399,6 +419,211 @@ too_few:
 }
 
 /*
+ * PUBLIC: int __rep_fileinfo_v7_marshal __P((ENV *, u_int32_t,
+ * PUBLIC:	 __rep_fileinfo_v7_args *, u_int8_t *, size_t, size_t *));
+ */
+int
+__rep_fileinfo_v7_marshal(env, version, argp, bp, max, lenp)
+	ENV *env;
+	u_int32_t version;
+	__rep_fileinfo_v7_args *argp;
+	u_int8_t *bp;
+	size_t *lenp, max;
+{
+	int copy_only;
+	u_int8_t *start;
+
+	if (max < __REP_FILEINFO_V7_SIZE
+	    + (size_t)argp->uid.size
+	    + (size_t)argp->info.size
+	    + (size_t)argp->dir.size)
+		return (ENOMEM);
+	start = bp;
+
+	copy_only = 0;
+	if (version < DB_REPVERSION_47)
+		copy_only = 1;
+	if (copy_only) {
+		memcpy(bp, &argp->pgsize, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->pgsize);
+	if (copy_only) {
+		memcpy(bp, &argp->pgno, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->pgno);
+	if (copy_only) {
+		memcpy(bp, &argp->max_pgno, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->max_pgno);
+	if (copy_only) {
+		memcpy(bp, &argp->filenum, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->filenum);
+	if (copy_only) {
+		memcpy(bp, &argp->finfo_flags, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->finfo_flags);
+	if (copy_only) {
+		memcpy(bp, &argp->type, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->type);
+	if (copy_only) {
+		memcpy(bp, &argp->db_flags, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->db_flags);
+	if (copy_only) {
+		memcpy(bp, &argp->uid.size, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->uid.size);
+	if (argp->uid.size > 0) {
+		memcpy(bp, argp->uid.data, argp->uid.size);
+		bp += argp->uid.size;
+	}
+	if (copy_only) {
+		memcpy(bp, &argp->info.size, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->info.size);
+	if (argp->info.size > 0) {
+		memcpy(bp, argp->info.data, argp->info.size);
+		bp += argp->info.size;
+	}
+	if (copy_only) {
+		memcpy(bp, &argp->dir.size, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_HTONL_COPYOUT(env, bp, argp->dir.size);
+	if (argp->dir.size > 0) {
+		memcpy(bp, argp->dir.data, argp->dir.size);
+		bp += argp->dir.size;
+	}
+
+	*lenp = (size_t)(bp - start);
+	return (0);
+}
+
+/*
+ * PUBLIC: int __rep_fileinfo_v7_unmarshal __P((ENV *, u_int32_t,
+ * PUBLIC:	 __rep_fileinfo_v7_args **, u_int8_t *, size_t, u_int8_t **));
+ */
+int
+__rep_fileinfo_v7_unmarshal(env, version, argpp, bp, max, nextp)
+	ENV *env;
+	u_int32_t version;
+	__rep_fileinfo_v7_args **argpp;
+	u_int8_t *bp;
+	size_t max;
+	u_int8_t **nextp;
+{
+	size_t needed;
+	__rep_fileinfo_v7_args *argp;
+	int ret;
+	int copy_only;
+
+	needed = __REP_FILEINFO_V7_SIZE;
+	if (max < needed)
+		goto too_few;
+	if ((ret = __os_malloc(env, sizeof(*argp), &argp)) != 0)
+		return (ret);
+
+	copy_only = 0;
+	if (version < DB_REPVERSION_47)
+		copy_only = 1;
+	if (copy_only) {
+		memcpy(&argp->pgsize, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->pgsize, bp);
+	if (copy_only) {
+		memcpy(&argp->pgno, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->pgno, bp);
+	if (copy_only) {
+		memcpy(&argp->max_pgno, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->max_pgno, bp);
+	if (copy_only) {
+		memcpy(&argp->filenum, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->filenum, bp);
+	if (copy_only) {
+		memcpy(&argp->finfo_flags, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->finfo_flags, bp);
+	if (copy_only) {
+		memcpy(&argp->type, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->type, bp);
+	if (copy_only) {
+		memcpy(&argp->db_flags, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->db_flags, bp);
+	if (copy_only) {
+		memcpy(&argp->uid.size, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->uid.size, bp);
+	if (argp->uid.size == 0)
+		argp->uid.data = NULL;
+	else
+		argp->uid.data = bp;
+	needed += (size_t)argp->uid.size;
+	if (max < needed)
+		goto too_few;
+	bp += argp->uid.size;
+	if (copy_only) {
+		memcpy(&argp->info.size, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->info.size, bp);
+	if (argp->info.size == 0)
+		argp->info.data = NULL;
+	else
+		argp->info.data = bp;
+	needed += (size_t)argp->info.size;
+	if (max < needed)
+		goto too_few;
+	bp += argp->info.size;
+	if (copy_only) {
+		memcpy(&argp->dir.size, bp, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else
+		DB_NTOHL_COPYIN(env, argp->dir.size, bp);
+	if (argp->dir.size == 0)
+		argp->dir.data = NULL;
+	else
+		argp->dir.data = bp;
+	needed += (size_t)argp->dir.size;
+	if (max < needed)
+		goto too_few;
+	bp += argp->dir.size;
+
+	if (nextp != NULL)
+		*nextp = bp;
+	*argpp = argp;
+	return (0);
+
+too_few:
+	__db_errx(env, DB_STR("3675",
+	    "Not enough input bytes to fill a __rep_fileinfo_v7 message"));
+	return (EINVAL);
+}
+
+/*
  * PUBLIC: int __rep_fileinfo_v6_marshal __P((ENV *, u_int32_t,
  * PUBLIC:	 __rep_fileinfo_v6_args *, u_int8_t *, size_t, size_t *));
  */
@@ -1039,3 +1264,245 @@ too_few:
 	return (EINVAL);
 }
 
+/*
+ * PUBLIC: void __rep_blob_update_req_marshal __P((ENV *,
+ * PUBLIC:	 __rep_blob_update_req_args *, u_int8_t *));
+ */
+void
+__rep_blob_update_req_marshal(env, argp, bp)
+	ENV *env;
+	__rep_blob_update_req_args *argp;
+	u_int8_t *bp;
+{
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_fid);
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_sid);
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_id);
+	DB_HTONLL_COPYOUT(env, bp, argp->highest_id);
+}
+
+/*
+ * PUBLIC: int __rep_blob_update_req_unmarshal __P((ENV *,
+ * PUBLIC:	 __rep_blob_update_req_args *, u_int8_t *, size_t, u_int8_t **));
+ */
+int
+__rep_blob_update_req_unmarshal(env, argp, bp, max, nextp)
+	ENV *env;
+	__rep_blob_update_req_args *argp;
+	u_int8_t *bp;
+	size_t max;
+	u_int8_t **nextp;
+{
+	if (max < __REP_BLOB_UPDATE_REQ_SIZE)
+		goto too_few;
+	DB_NTOHLL_COPYIN(env, argp->blob_fid, bp);
+	DB_NTOHLL_COPYIN(env, argp->blob_sid, bp);
+	DB_NTOHLL_COPYIN(env, argp->blob_id, bp);
+	DB_NTOHLL_COPYIN(env, argp->highest_id, bp);
+
+	if (nextp != NULL)
+		*nextp = bp;
+	return (0);
+
+too_few:
+	__db_errx(env, DB_STR("3675",
+	    "Not enough input bytes to fill a __rep_blob_update_req message"));
+	return (EINVAL);
+}
+
+/*
+ * PUBLIC: void __rep_blob_update_marshal __P((ENV *,
+ * PUBLIC:	 __rep_blob_update_args *, u_int8_t *));
+ */
+void
+__rep_blob_update_marshal(env, argp, bp)
+	ENV *env;
+	__rep_blob_update_args *argp;
+	u_int8_t *bp;
+{
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_fid);
+	DB_HTONLL_COPYOUT(env, bp, argp->highest_id);
+	DB_HTONL_COPYOUT(env, bp, argp->flags);
+	DB_HTONL_COPYOUT(env, bp, argp->num_blobs);
+}
+
+/*
+ * PUBLIC: int __rep_blob_update_unmarshal __P((ENV *,
+ * PUBLIC:	 __rep_blob_update_args *, u_int8_t *, size_t, u_int8_t **));
+ */
+int
+__rep_blob_update_unmarshal(env, argp, bp, max, nextp)
+	ENV *env;
+	__rep_blob_update_args *argp;
+	u_int8_t *bp;
+	size_t max;
+	u_int8_t **nextp;
+{
+	if (max < __REP_BLOB_UPDATE_SIZE)
+		goto too_few;
+	DB_NTOHLL_COPYIN(env, argp->blob_fid, bp);
+	DB_NTOHLL_COPYIN(env, argp->highest_id, bp);
+	DB_NTOHL_COPYIN(env, argp->flags, bp);
+	DB_NTOHL_COPYIN(env, argp->num_blobs, bp);
+
+	if (nextp != NULL)
+		*nextp = bp;
+	return (0);
+
+too_few:
+	__db_errx(env, DB_STR("3675",
+	    "Not enough input bytes to fill a __rep_blob_update message"));
+	return (EINVAL);
+}
+
+/*
+ * PUBLIC: void __rep_blob_file_marshal __P((ENV *,
+ * PUBLIC:	 __rep_blob_file_args *, u_int8_t *));
+ */
+void
+__rep_blob_file_marshal(env, argp, bp)
+	ENV *env;
+	__rep_blob_file_args *argp;
+	u_int8_t *bp;
+{
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_sid);
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_id);
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_size);
+}
+
+/*
+ * PUBLIC: int __rep_blob_file_unmarshal __P((ENV *,
+ * PUBLIC:	 __rep_blob_file_args *, u_int8_t *, size_t, u_int8_t **));
+ */
+int
+__rep_blob_file_unmarshal(env, argp, bp, max, nextp)
+	ENV *env;
+	__rep_blob_file_args *argp;
+	u_int8_t *bp;
+	size_t max;
+	u_int8_t **nextp;
+{
+	if (max < __REP_BLOB_FILE_SIZE)
+		goto too_few;
+	DB_NTOHLL_COPYIN(env, argp->blob_sid, bp);
+	DB_NTOHLL_COPYIN(env, argp->blob_id, bp);
+	DB_NTOHLL_COPYIN(env, argp->blob_size, bp);
+
+	if (nextp != NULL)
+		*nextp = bp;
+	return (0);
+
+too_few:
+	__db_errx(env, DB_STR("3675",
+	    "Not enough input bytes to fill a __rep_blob_file message"));
+	return (EINVAL);
+}
+
+/*
+ * PUBLIC: void __rep_blob_chunk_marshal __P((ENV *,
+ * PUBLIC:	 __rep_blob_chunk_args *, u_int8_t *));
+ */
+void
+__rep_blob_chunk_marshal(env, argp, bp)
+	ENV *env;
+	__rep_blob_chunk_args *argp;
+	u_int8_t *bp;
+{
+	DB_HTONL_COPYOUT(env, bp, argp->flags);
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_fid);
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_sid);
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_id);
+	DB_HTONLL_COPYOUT(env, bp, argp->offset);
+	DB_HTONL_COPYOUT(env, bp, argp->data.size);
+	if (argp->data.size > 0) {
+		memcpy(bp, argp->data.data, argp->data.size);
+		bp += argp->data.size;
+	}
+}
+
+/*
+ * PUBLIC: int __rep_blob_chunk_unmarshal __P((ENV *,
+ * PUBLIC:	 __rep_blob_chunk_args *, u_int8_t *, size_t, u_int8_t **));
+ */
+int
+__rep_blob_chunk_unmarshal(env, argp, bp, max, nextp)
+	ENV *env;
+	__rep_blob_chunk_args *argp;
+	u_int8_t *bp;
+	size_t max;
+	u_int8_t **nextp;
+{
+	size_t needed;
+
+	needed = __REP_BLOB_CHUNK_SIZE;
+	if (max < needed)
+		goto too_few;
+	DB_NTOHL_COPYIN(env, argp->flags, bp);
+	DB_NTOHLL_COPYIN(env, argp->blob_fid, bp);
+	DB_NTOHLL_COPYIN(env, argp->blob_sid, bp);
+	DB_NTOHLL_COPYIN(env, argp->blob_id, bp);
+	DB_NTOHLL_COPYIN(env, argp->offset, bp);
+	DB_NTOHL_COPYIN(env, argp->data.size, bp);
+	if (argp->data.size == 0)
+		argp->data.data = NULL;
+	else
+		argp->data.data = bp;
+	needed += (size_t)argp->data.size;
+	if (max < needed)
+		goto too_few;
+	bp += argp->data.size;
+
+	if (nextp != NULL)
+		*nextp = bp;
+	return (0);
+
+too_few:
+	__db_errx(env, DB_STR("3675",
+	    "Not enough input bytes to fill a __rep_blob_chunk message"));
+	return (EINVAL);
+}
+
+/*
+ * PUBLIC: void __rep_blob_chunk_req_marshal __P((ENV *,
+ * PUBLIC:	 __rep_blob_chunk_req_args *, u_int8_t *));
+ */
+void
+__rep_blob_chunk_req_marshal(env, argp, bp)
+	ENV *env;
+	__rep_blob_chunk_req_args *argp;
+	u_int8_t *bp;
+{
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_fid);
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_sid);
+	DB_HTONLL_COPYOUT(env, bp, argp->blob_id);
+	DB_HTONLL_COPYOUT(env, bp, argp->offset);
+}
+
+/*
+ * PUBLIC: int __rep_blob_chunk_req_unmarshal __P((ENV *,
+ * PUBLIC:	 __rep_blob_chunk_req_args *, u_int8_t *, size_t, u_int8_t **));
+ */
+int
+__rep_blob_chunk_req_unmarshal(env, argp, bp, max, nextp)
+	ENV *env;
+	__rep_blob_chunk_req_args *argp;
+	u_int8_t *bp;
+	size_t max;
+	u_int8_t **nextp;
+{
+	if (max < __REP_BLOB_CHUNK_REQ_SIZE)
+		goto too_few;
+	DB_NTOHLL_COPYIN(env, argp->blob_fid, bp);
+	DB_NTOHLL_COPYIN(env, argp->blob_sid, bp);
+	DB_NTOHLL_COPYIN(env, argp->blob_id, bp);
+	DB_NTOHLL_COPYIN(env, argp->offset, bp);
+
+	if (nextp != NULL)
+		*nextp = bp;
+	return (0);
+
+too_few:
+	__db_errx(env, DB_STR("3675",
+	    "Not enough input bytes to fill a __rep_blob_chunk_req message"));
+	return (EINVAL);
+}
+
diff --git a/src/rep/rep_backup.c b/src/rep/rep_backup.c
index cfde7622..14bc63bb 100644
--- a/src/rep/rep_backup.c
+++ b/src/rep/rep_backup.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2004, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2004, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_am.h"
 #include "dbinc/fop.h"
@@ -26,21 +27,45 @@
  * Note that the fileinfo for the first file in the list always appears at
  * (constant) offset __REP_UPDATE_SIZE in the buffer.
  */
+#define	FILE_CTX_INMEM_ONLY	0x01
 typedef struct {
 	u_int8_t	*buf;	/* Buffer base address. */
 	u_int32_t	size;	/* Total allocated buffer size. */
 	u_int8_t	*fillptr; /* Pointer to first unused space. */
 	u_int32_t	count;	/* Number of entries currently in list. */
 	u_int32_t	version; /* Rep version of marshaled format. */
+	u_int32_t	flags;	/* Context flags. */
 } FILE_LIST_CTX;
 #define	FIRST_FILE_PTR(buf)	((buf) + __REP_UPDATE_SIZE)
 
 /*
+ * Flags used to show the state of blob files on the master in messages
+ * sent to the client.
+ */
+#define	BLOB_DONE           0x01
+#define	BLOB_DELETE         0x02
+#define	BLOB_CHUNK_FAIL     0x04
+
+#define	BLOB_ID_SIZE	    sizeof(db_seq_t)
+#define	BLOB_KEY_SIZE	    (2 * BLOB_ID_SIZE)
+
+/*
  * Function that performs any desired processing on a single file, as part of
  * the traversal of a list of database files, such as with internal init.
  */
 typedef int (FILE_WALK_FN) __P((ENV *, __rep_fileinfo_args *, void *));
 
+static int __rep_add_files_to_list __P((
+    ENV *, const char *, const char *, FILE_LIST_CTX *, const char **, int));
+static int __rep_blob_chunk_gap
+    __P((ENV *, int, DB_THREAD_INFO *, REP *, int *, db_seq_t, int));
+static int __rep_blob_cleanup __P((ENV *, REP *));
+static int __rep_blobdone
+    __P((ENV *, int, DB_THREAD_INFO *, REP *, db_seq_t, int));
+static int __rep_blob_find_files __P((ENV *, DB_THREAD_INFO *, const char *,
+    db_seq_t *, db_seq_t, db_seq_t, db_seq_t *, DBT *, size_t *, u_int32_t *));
+static int __rep_blob_sort_dirs __P((ENV *,
+    int (*)(const char *), char **, int, char ***, int *));
 static FILE_WALK_FN __rep_check_uid;
 static int __rep_clean_interrupted __P((ENV *));
 static FILE_WALK_FN __rep_cleanup_nimdbs;
@@ -52,6 +77,8 @@ static int __rep_get_fileinfo __P((ENV *, const char *,
     const char *, __rep_fileinfo_args *, u_int8_t *));
 static int __rep_get_file_list __P((ENV *,
     DB_FH *, u_int32_t, u_int32_t *, DBT *));
+static int __rep_init_file_list_context __P((ENV *,
+    u_int32_t, u_int32_t, int, FILE_LIST_CTX *));
 static int __rep_is_replicated_db __P((const char *, const char *));
 static int __rep_log_setup __P((ENV *,
     REP *, u_int32_t, u_int32_t, DB_LSN *));
@@ -72,9 +99,12 @@ static FILE_WALK_FN __rep_remove_file;
 static int __rep_remove_logs __P((ENV *));
 static int __rep_remove_nimdbs __P((ENV *));
 static int __rep_rollback __P((ENV *, DB_LSN *));
+static int __rep_select_blob_file __P((const char *));
+static int __rep_select_blob_sdb __P((const char *));
 static int __rep_unlink_by_list __P((ENV *, u_int32_t,
     u_int8_t *, u_int32_t, u_int32_t));
 static FILE_WALK_FN __rep_unlink_file;
+static int __rep_walk_blob_dir __P((ENV *, FILE_LIST_CTX*));
 static int __rep_walk_filelist __P((ENV *, u_int32_t, u_int8_t *,
     u_int32_t, u_int32_t, FILE_WALK_FN *, void *));
 static int __rep_walk_dir __P((ENV *, const char *, const char *,
@@ -129,14 +159,12 @@ __rep_update_req(env, rp)
 
 	dblp = env->lg_handle;
 	logc = NULL;
-	if ((ret = __os_calloc(env, 1, MEGABYTE, &context.buf)) != 0)
-		goto err_noalloc;
-	context.size = MEGABYTE;
-	context.count = 0;
-	context.version = rp->rep_version;
 
 	/* Reserve space for the update_args, and fill in file info. */
-	context.fillptr = FIRST_FILE_PTR(context.buf);
+	if ((ret = __rep_init_file_list_context(env, rp->rep_version,
+	    F_ISSET(rp, REPCTL_INMEM_ONLY) ? FILE_CTX_INMEM_ONLY : 0,
+	    1, &context)) != 0)
+		goto err_noalloc;
 	if ((ret = __rep_find_dbs(env, &context)) != 0)
 		goto err;
 
@@ -214,6 +242,472 @@ err_noalloc:
 }
 
 /*
+ * Passed to the __rep_blob_sort_dirs function.
+ * Select blob files, of the form __db.bl###
+ */
+static int
+__rep_select_blob_file(file)
+	    const char *file;
+{
+	if (strncmp(BLOB_FILE_PREFIX, file, strlen(BLOB_FILE_PREFIX)) == 0)
+		return (1);
+	else
+		return (0);
+}
+
+/*
+ * Passed to the __rep_blob_sort_dirs function.
+ * Select blob subdatabase directories, of the form __db###
+ */
+static int
+__rep_select_blob_sdb(file)
+	    const char *file;
+{
+	if (strncmp(BLOB_DIR_PREFIX, file, strlen(BLOB_DIR_PREFIX)) == 0 &&
+	    strncmp(BLOB_FILE_PREFIX, file, strlen(BLOB_FILE_PREFIX)) != 0 &&
+	    strcmp(BLOB_META_FILE_NAME, file) != 0)
+		return (1);
+	else
+		return (0);
+}
+
+/*
+ * __rep_blob_sort_dirs
+ *	Create a sorted list of directory names that all share a type that
+ *	is selected using the given function.
+ */
+static int
+__rep_blob_sort_dirs(env, select_fn, dirs, dirs_cnt, sorted, sorted_cnt)
+	ENV *env;
+	int (*select_fn) __P((const char *));
+	char **dirs;
+	int dirs_cnt;
+	char ***sorted;
+	int *sorted_cnt;
+{
+	char **sort, *tmp;
+	int i, ret, size, sort_cnt, swapped;
+
+	*sorted = NULL;
+	*sorted_cnt = 0;
+	sort_cnt = 0;
+
+	if ((ret = __os_malloc(env,
+	    (sizeof(char *) * (unsigned int)dirs_cnt), &sort)) != 0)
+		return (ret);
+
+	for (i = 0; i < dirs_cnt; i++) {
+		if (select_fn(dirs[i])) {
+			sort[sort_cnt] = dirs[i];
+			sort_cnt++;
+		}
+	}
+
+	/*
+	 * Directories are usually returned in order, or close to it, so use
+	 * Bubble Sort to sort the list.
+	 */
+	size = sort_cnt;
+	swapped = 1;
+	while (swapped == 1 && size > 1) {
+		swapped = 0;
+		for (i = 0; (i + 1) < size; i++) {
+			if (strcmp(sort[i], sort[i+1]) > 0) {
+				tmp = sort[i];
+				sort[i] = sort[i+1];
+				sort[i+1] = tmp;
+				swapped = 1;
+			}
+		}
+		size--;
+	}
+
+	*sorted = sort;
+	*sorted_cnt = sort_cnt;
+
+	return (0);
+}
+
+#define	BLOB_THROTTLE_DEFAULT	(10 * MEGABYTE)
+
+/*
+ * __rep_blob_update_req
+ *	Send a list of blob files, starting after the blob id and sub-database
+ *	id sent in the BLOB_UPDATE_REQ message.
+ *
+ * PUBLIC: int __rep_blob_update_req __P((ENV *, DB_THREAD_INFO *, DBT *));
+ */
+int
+__rep_blob_update_req(env, ip, rec)
+	ENV *env;
+	DB_THREAD_INFO *ip;
+	DBT *rec;
+{
+	DBT rbudbt;
+	REP *rep;
+	__rep_blob_update_args rbu;
+	__rep_blob_update_req_args rbur;
+	db_seq_t blob_fid, blob_id, blob_sdb, tmp;
+	int cur, dirs_cnt, ret, sdb_cnt;
+	size_t sent;
+	char *blob_sub_dir, *dir, **dirs, **sdb;
+	u_int32_t num_blobs, throttle;
+	u_int8_t *ptr;
+
+	memset(&rbu, 0, sizeof(__rep_blob_update_args));
+	memset(&rbudbt, 0, sizeof(DBT));
+	blob_sub_dir  = dir = NULL;
+	dirs = sdb = NULL;
+	sent = 0;
+	num_blobs = 0;
+	cur = dirs_cnt = sdb_cnt = 0;
+	rep = env->rep_handle->region;
+	throttle = rep->gbytes * GIGABYTE + rep->bytes;
+	if (throttle == 0)
+		throttle = BLOB_THROTTLE_DEFAULT;
+
+	if ((ret = __rep_blob_update_req_unmarshal(
+	    env, &rbur, rec->data, rec->size, &ptr)) != 0)
+		goto err;
+
+	RPRINT(env, (env, DB_VERB_REP_SYNC,
+"blob_update_req: file_id %llu sdb_id %llu blob_id %llu highest %llu",
+	    (long long)rbur.blob_fid, (long long)rbur.blob_sid,
+	    (long long)rbur.blob_id, (long long)rbur.highest_id));
+
+	rbu.blob_fid = rbur.blob_fid;
+
+	if ((ret = __os_malloc(env, MEGABYTE, &rbudbt.data)) != 0)
+		goto err;
+	rbudbt.ulen = MEGABYTE;
+	rbudbt.size = __REP_BLOB_UPDATE_SIZE;
+
+	blob_fid = (db_seq_t)rbur.blob_fid;
+	blob_sdb = (db_seq_t)rbur.blob_sid;
+	blob_id = (db_seq_t)rbur.blob_id;
+
+	/* Find the first blob file if it is unknown. */
+	if (blob_id == 0 && blob_sdb == 0) {
+find_sdb:	if (dirs == NULL) {
+			if ((ret = __blob_make_sub_dir(
+			    env, &blob_sub_dir, blob_fid, 0)) != 0)
+				goto err;
+			if ((ret = __db_appname(
+			    env, DB_APP_BLOB, blob_sub_dir, NULL, &dir)) != 0)
+				goto err;
+			/* If no directory, there are no blobs to send. */
+			if (__os_exists(env, dir, NULL) != 0)
+				goto filedone;
+
+			if ((ret = __os_dirlist(
+			    env, dir, 1, &dirs, &dirs_cnt)) != 0)
+				goto err;
+
+			if (dirs_cnt == 0)
+				goto filedone;
+
+			if ((ret = __rep_blob_sort_dirs(
+			    env, __rep_select_blob_sdb,
+			    dirs, dirs_cnt, &sdb, &sdb_cnt)) != 0)
+				goto err;
+		}
+		/*
+		 * Iterate through the list of subdirectories, until we find
+		 * one that has an id larger than the current subdirectory id.
+		 */
+		while (cur < sdb_cnt) {
+			if ((ret = __blob_path_to_dir_ids(
+			    env, sdb[cur], &tmp, NULL)) != 0)
+				goto err;
+			if (blob_sdb < tmp) {
+				blob_sdb = tmp;
+				break;
+			}
+			cur++;
+		}
+		/* Check if no more subdirectories to search */
+		if (sdb_cnt != 0 && cur == sdb_cnt)
+			goto filedone;
+		if (dir != NULL)
+			__os_free(env, dir);
+		dir = NULL;
+		if (blob_sub_dir != NULL)
+			__os_free(env, blob_sub_dir);
+		blob_sub_dir = NULL;
+	}
+
+	if (blob_sub_dir == NULL && (ret =
+	    __blob_make_sub_dir(env, &blob_sub_dir, blob_fid, blob_sdb)) != 0)
+		goto err;
+
+	if (dir == NULL && (ret = __db_appname(
+	    env, DB_APP_BLOB, blob_sub_dir, NULL, &dir)) != 0)
+		goto err;
+	/* Search the current directory for blob files with id > blob_id. */
+	if ((ret = __rep_blob_find_files(
+	    env, ip, dir, &blob_id, blob_sdb, blob_fid,
+	    (db_seq_t *)&rbur.highest_id, &rbudbt, &sent, &num_blobs)) != 0)
+		goto err;
+
+	/*
+	 * If we have not reached the send limit, and there are still
+	 * directories to search, then search the next directory.
+	 */
+	if (sent <  throttle) {
+		if (blob_sdb != 0) {
+			rbur.highest_id = 0;
+			blob_id = 0;
+			__os_free(env, blob_sub_dir);
+			blob_sub_dir = NULL;
+			__os_free(env, dir);
+			dir = NULL;
+			goto find_sdb;
+		} else {
+			/* Mark as the end of the files. */
+filedone:		F_SET(&rbu, BLOB_DONE);
+			rbur.highest_id = 0;
+		}
+	} else
+		STAT(rep->stat.st_nthrottles++);
+
+	rbu.num_blobs = num_blobs;
+	rbu.highest_id = rbur.highest_id;
+	__rep_blob_update_marshal(env, &rbu, rbudbt.data);
+	RPRINT(env, (env, DB_VERB_REP_SYNC,
+	    "Sending blob_update: file_id %llu, num_blobs %lu, flags %lu",
+	    (long long)rbu.blob_fid,
+	    (long)num_blobs, (unsigned long)rbu.flags));
+	(void)__rep_send_message(
+	    env, DB_EID_BROADCAST, REP_BLOB_UPDATE, NULL, &rbudbt, 0, 0);
+
+err:	if (sdb != NULL)
+		__os_free(env, sdb);
+	if (dirs != NULL)
+		__os_dirfree(env, dirs, dirs_cnt);
+	if (dir != NULL)
+		__os_free(env, dir);
+	if (blob_sub_dir != NULL)
+		__os_free(env, blob_sub_dir);
+	if (rbudbt.data != NULL)
+		__os_free(env, rbudbt.data);
+	return (ret);
+}
+
+/*
+ * __rep_blob_find_files
+ *
+ * Search a directory for blob files, starting with the given blob id and
+ * sub-database id.  Add information for each blob to the message buffer until
+ * there are no more files, or it has reached the maximum send amount in terms
+ * of combined blob files size.
+ *
+ * This search is complicated because the blobs have to be sent in order by id,
+ * but there can be huge holes between a blob file and the one with the next
+ * highest id, so iterating through the ids looking to see if the file exists
+ * for each id will take too long.  The solution is to walk the directory
+ * hierarchy in order, reading every file in that directory, sorting them by
+ * id, and adding them to the update list.
+ */
+static int
+__rep_blob_find_files(
+    env, ip, dir, blob_id, blob_sid, blob_fid, highest, buf, sent, num)
+	ENV *env;
+	DB_THREAD_INFO *ip;
+	const char *dir;
+	db_seq_t *blob_id;
+	db_seq_t blob_sid;
+	db_seq_t blob_fid;
+	db_seq_t *highest;
+	DBT *buf;
+	size_t *sent;
+	u_int32_t *num;
+{
+	DB *bmd;
+	DB_FH *fhp;
+	DB_TXN *txn;
+	REP *rep;
+	__rep_blob_file_args rbf;
+	char blob_path[MAX_BLOB_PATH_SZ], **dirs, **files, *path, *ptr;
+	db_seq_t tmp;
+	int blob_path_len, cur, depth, dirs_cnt, files_cnt, ret;
+	off_t blob_size;
+	size_t len;
+	u_int32_t bytes, mbytes, throttle;
+
+	bmd = NULL;
+	txn = NULL;
+	fhp = NULL;
+	path = NULL;
+	dirs = files = NULL;
+	dirs_cnt = files_cnt = 0;
+	rbf.blob_sid = (u_int64_t)blob_sid;
+	rep = env->rep_handle->region;
+	throttle = rep->gbytes * GIGABYTE + rep->bytes;
+	if (throttle == 0)
+		throttle = BLOB_THROTTLE_DEFAULT;
+
+	if ((ret = __os_malloc(
+	    env, strlen(dir) + MAX_BLOB_PATH_SZ, &path)) != 0)
+		goto err;
+
+	/*
+	 * Read the highest possible blob id from the blob meta database, so
+	 * we know when to stop looking for files for this database.  The
+	 * highest value is reset everytime we switch to a new subdatabase.
+	 */
+	if (*highest == 0) {
+		if ((ret = __db_create_internal(&bmd, env, 0)) != 0)
+			goto err;
+
+		if ((ret = __txn_begin(
+		    env, ip, NULL, &txn, DB_IGNORE_LEASE)) != 0)
+			goto err;
+
+		bmd->blob_file_id = blob_fid;
+		bmd->blob_sdb_id = blob_sid;
+		if ((ret = __blob_highest_id(bmd, txn, highest) ) != 0)
+			goto err;
+
+		if ((ret = __txn_abort(txn)) != 0)
+			goto err;
+		txn = NULL;
+		if ((ret = __db_close(bmd, NULL, 0)) != 0)
+			goto err;
+		bmd = NULL;
+		(*highest)++;
+	}
+
+	(*blob_id)++;
+	while (*sent < throttle && *blob_id < *highest) {
+		memset(blob_path, 0, MAX_BLOB_PATH_SZ);
+		blob_path_len = depth = 0;
+
+		/* Calucate the subdirectory from the blob id. */
+		__blob_calculate_dirs(
+		    *blob_id, blob_path, &blob_path_len, &depth);
+		if (blob_path_len != 0) {
+			(void)sprintf(path, "%s%c%s%c",
+			dir, PATH_SEPARATOR[0], blob_path, PATH_SEPARATOR[0]);
+		} else
+			(void)sprintf(path, "%s", dir);
+		len = strlen(path);
+
+		/* If the sub-directory does not exist, look for the next. */
+		if (__os_exists(env, path, NULL) != 0) {
+			(*blob_id) +=
+			    BLOB_DIR_ELEMS - (*blob_id % BLOB_DIR_ELEMS);
+			continue;
+		}
+
+		/* Get a list of all the blob files, sorted by id. */
+		if ((ret = __os_dirlist(env, path, 0, &dirs, &dirs_cnt)) != 0)
+			goto err;
+
+		if ((ret = __rep_blob_sort_dirs(env, __rep_select_blob_file,
+		    dirs, dirs_cnt, &files, &files_cnt)) != 0)
+			goto err;
+
+		/*
+		 * Find the first blob file with an id greater than or equal to
+		 * the last id.
+		 */
+		for (cur = 0; cur < files_cnt; cur++) {
+			ptr = files[cur];
+			ptr += strlen(BLOB_FILE_PREFIX);
+			if ((ret = __blob_str_to_id(
+			    env, (const char **)&ptr, &tmp)) != 0)
+				goto err;
+			DB_ASSERT(env, tmp != 0);
+			if (tmp >= *blob_id)
+				break;
+		}
+
+		/* Add each remaining blob file to the message buffer. */
+		while (cur < files_cnt) {
+			/* Get the blob id from the current file name. */
+			(void)sprintf(path + len, "%s", files[cur]);
+			ptr = path + len + strlen(BLOB_FILE_PREFIX);
+			if ((ret = __blob_str_to_id(
+			    env, (const char **)&ptr, blob_id)) != 0)
+				goto err;
+			rbf.blob_id = (u_int64_t)*blob_id;
+			/* Open the file and get its size. */
+			if ((ret = __os_open(
+			    env, path, 0, DB_OSO_RDONLY, 0, &fhp)) != 0) {
+			        if (ret == ENOENT) {
+					ret = 0;
+					RPRINT(env, (env, DB_VERB_REP_SYNC,
+			"blob_update blob file: %llu deleted, skipping.",
+					    (long long)rbf.blob_id));
+					cur++;
+					continue;
+				}
+				goto err;
+			}
+			if ((ret = __os_ioinfo(
+			    env, path, fhp, &mbytes, &bytes, NULL)) != 0)
+				goto err;
+			if ((ret =__os_closehandle(env, fhp)) != 0)
+				goto err;
+			fhp = NULL;
+			blob_size = ((off_t)mbytes * (off_t)MEGABYTE) + bytes;
+			rbf.blob_size = (u_int64_t)blob_size;
+			if (blob_size > UINT32_MAX)
+				(*sent) = throttle + 1;
+			else {
+				if (((*sent) + (size_t)blob_size) < (*sent))
+					(*sent) = throttle + 1;
+				else
+					(*sent) += (size_t)blob_size;
+			}
+			__rep_blob_file_marshal(
+			    env, &rbf, (u_int8_t *)buf->data + buf->size);
+			(*num)++;
+			buf->size += __REP_BLOB_FILE_SIZE;
+			RPRINT(env, (env, DB_VERB_REP_SYNC,
+	"blob_update adding: blob_sid %llu, blob_id %llu blob_size %llu",
+			    (long long)rbf.blob_sid,
+			    (long long)rbf.blob_id, (long long)rbf.blob_size));
+			if ((*sent) > throttle)
+				goto err;
+
+			/* Resize if there is not enough space to grow. */
+			if (buf->size > (buf->ulen - __REP_BLOB_FILE_SIZE)) {
+				if ((ret = __os_realloc(
+				    env, buf->ulen * 2, &buf->data)) != 0)
+					goto err;
+				buf->ulen *= 2;
+			}
+			cur++;
+		}
+		/*
+		 * Move to the next directory of blob files by setting the blob
+		 * id to the next largest possible value.
+		 */
+		(*blob_id) += BLOB_DIR_ELEMS - (*blob_id % BLOB_DIR_ELEMS);
+		__os_free(env, files);
+		files = NULL;
+		__os_dirfree(env, dirs, dirs_cnt);
+		dirs = NULL;
+	}
+err:
+	if (path != NULL)
+		__os_free(env, path);
+	if (files != NULL)
+		__os_free(env, files);
+	if (dirs != NULL)
+		__os_dirfree(env, dirs, dirs_cnt);
+	if (fhp != NULL)
+		(void)__os_closehandle(env, fhp);
+	if (txn != NULL)
+		(void)__txn_abort(txn);
+	if (bmd != NULL)
+		(void)__db_close(bmd, NULL, 0);
+
+	return (ret);
+}
+
+/*
  * __rep_find_dbs -
  *	Walk through all the named files/databases including those in the
  *	environment or data_dirs and those that in named and in-memory.  We
@@ -240,7 +734,8 @@ __rep_find_dbs(env, context)
 	 * replicated user databases.  If the application has a metadata_dir,
 	 * this will also find any persistent internal system databases.
 	 */
-	if (dbenv->db_data_dir != NULL) {
+	if (!F_ISSET(context, FILE_CTX_INMEM_ONLY) &&
+	    dbenv->db_data_dir != NULL) {
 		for (ddir = dbenv->db_data_dir; *ddir != NULL; ++ddir) {
 			if ((ret = __db_appname(env,
 			    DB_APP_NONE, *ddir, NULL, &real_dir)) != 0)
@@ -252,16 +747,24 @@ __rep_find_dbs(env, context)
 			real_dir = NULL;
 		}
 	}
+
 	/*
 	 * Walk the environment directory.  If the application doesn't
 	 * have a metadata_dir, this will return persistent internal system
 	 * databases.  If the application doesn't have a separate data
 	 * directory, this will also return all user databases.
 	 */
-	if (ret == 0)
+	if (!F_ISSET(context, FILE_CTX_INMEM_ONLY) && ret == 0)
 		ret = __rep_walk_dir(env, env->db_home, NULL, context);
 
-	/* Now, collect any in-memory named databases. */
+	/* Gather the databases in the blob directory. */
+	if (!F_ISSET(context, FILE_CTX_INMEM_ONLY) && ret == 0)
+		ret = __rep_walk_blob_dir(env, context);
+
+	/*
+	 * Now, collect any in-memory named databases.  We do this no
+	 * matter if the INMEM_ONLY flag is set or not.
+	 */
 	if (ret == 0)
 		ret = __rep_walk_dir(env, NULL, NULL, context);
 
@@ -271,6 +774,148 @@ __rep_find_dbs(env, context)
 }
 
 /*
+ * __rep_walk_blob_dir --
+ *
+ * The blob directory hierarchy consists of a top layer that contains the
+ * blob meta database (BMD) and a set of blob directories (BLDIR).
+ * Each BLDIR corresponds to a database file.  If the database file doesn't
+ * contain subdatabases, the BLDIR contains a BMD and blob files.  If the
+ * database file contains subdatabases, the BLDIR contains a BLSDIR
+ * subdirectory for each subdatabase.  Each BLSDIR contains a BMD and blob
+ * files.
+ *
+ * This function walks the blob directory hierarchy and records any BMD.
+ * It first checks if the top level BMD exists, and if it does searches
+ * the first and second layers of the hierarchy for BMDs.
+ */
+static int
+__rep_walk_blob_dir(env, context)
+	ENV *env;
+	FILE_LIST_CTX *context;
+{
+	int cnt, cnt2, i, j, ret;
+	size_t len;
+	char *blob_dir, *blob_sub, **dirs, *name, *name2, **subdirs;
+	char blob_sub_buf[MAX_BLOB_PATH_SZ];
+	const char *bmd, *dirp;
+
+	cnt = cnt2 = 0;
+	blob_dir = name = name2 = NULL;
+	dirs = subdirs = NULL;
+	bmd = BLOB_META_FILE_NAME;
+	blob_sub = blob_sub_buf;
+
+	if ((ret = __db_appname(
+	    env, DB_APP_BLOB, BLOB_META_FILE_NAME, &dirp, &name)) != 0)
+		goto err;
+
+	/*
+	 * If the main blob meta database does not exist, then no databases in
+	 * the environment supports blobs.
+	 */
+	if ((ret = __os_exists(env, name, NULL)) != 0) {
+		ret = 0;
+		goto err;
+	}
+
+	/* Get the blob directory. */
+	if ((ret = __db_appname(
+	    env, DB_APP_BLOB, NULL, &dirp, &blob_dir)) != 0)
+		goto err;
+
+	if ((ret = __rep_add_files_to_list(
+	    env, blob_dir, NULL, context, &bmd, 1)) != 0)
+		goto err;
+
+	if ((ret = __os_dirlist(env, blob_dir, 1, &dirs, &cnt)) != 0)
+		goto err;
+
+	__os_free(env, name);
+	name = NULL;
+	if ((ret = __os_malloc(
+	    env, MAX_BLOB_PATH_SZ + strlen(blob_dir), &name)) != 0)
+		goto err;
+
+	for (i = 0; i < cnt; i++) {
+		/*
+		 * Skip blob files and the top level BMD
+		 * (which was handled above).
+		 */
+		if (IS_BLOB_META(dirs[i]) || IS_BLOB_FILE(dirs[i]))
+			continue;
+		len = strlen(blob_dir) +
+		    strlen(dirs[i]) + strlen(BLOB_META_FILE_NAME) + 3;
+		(void)snprintf(name, len, "%s%c%s%c%s", blob_dir,
+		    PATH_SEPARATOR[0], dirs[i], PATH_SEPARATOR[0],
+		    BLOB_META_FILE_NAME);
+		/*
+		 * If a blob meta database exists, add it to the list, and move
+		 * on to the next directory, otherwise get a directory list and
+		 * check the second layer for BMD.  If a directory contains a
+		 * BMD, then it cannot contain subdirectories with BMD.
+		 */
+		if (__os_exists(env, name, NULL) == 0) {
+			(void)snprintf(blob_sub,
+			    strlen(dirs[i]) + strlen(bmd) + 2,
+			    "%s%c%s", dirs[i], PATH_SEPARATOR[0], bmd);
+			if ((ret = __rep_add_files_to_list(env, blob_dir,
+			    NULL, context, (const char **)&blob_sub, 1)) != 0)
+				goto err;
+		} else {
+			len = strlen(blob_dir) + strlen(dirs[i]) + 2;
+			(void)snprintf(name, len, "%s%c%s",
+			    blob_dir, PATH_SEPARATOR[0], dirs[i]);
+			if ((ret = __os_dirlist(
+			    env, name, 1, &subdirs, &cnt2)) != 0)
+				goto err;
+			if (name2 == NULL) {
+				if ((ret = __os_malloc(env,
+				    MAX_BLOB_PATH_SZ + strlen(name),
+				    &name2)) != 0)
+					goto err;
+			}
+			for (j = 0; j < cnt2; j++) {
+				if (IS_BLOB_FILE(subdirs[j]))
+					continue;
+				len = strlen(name) + strlen(subdirs[j])
+				    + strlen(BLOB_META_FILE_NAME) + 3;
+				(void)snprintf(name2, len, "%s%c%s%c%s",
+				    name, PATH_SEPARATOR[0], subdirs[j],
+				    PATH_SEPARATOR[0], BLOB_META_FILE_NAME);
+				if ((ret = __os_exists(
+				    env, name2, NULL)) == 0) {
+					len = strlen(dirs[i])
+					    + strlen(subdirs[j])
+					    + strlen(bmd) + 3;
+					(void)snprintf(blob_sub,
+					    len, "%s%c%s%c%s", dirs[i],
+					    PATH_SEPARATOR[0], subdirs[j],
+					    PATH_SEPARATOR[0], bmd);
+					if ((ret = __rep_add_files_to_list(
+					    env, blob_dir, NULL, context,
+					    (const char **)&blob_sub, 1)) != 0)
+						goto err;
+				}
+			}
+			__os_dirfree(env, subdirs, cnt2);
+			subdirs = NULL;
+		}
+	}
+
+err:	if (name != NULL)
+		__os_free(env, name);
+	if (name2 != NULL)
+		__os_free(env, name2);
+	if (blob_dir != NULL)
+		__os_free(env, blob_dir);
+	if (dirs != NULL)
+		__os_dirfree(env, dirs, cnt);
+	if (subdirs != NULL)
+		__os_dirfree(env, subdirs, cnt2);
+	return (ret);
+}
+
+/*
  * __rep_walk_dir --
  *
  * This is the routine that walks a directory and fills in the structures
@@ -284,11 +929,8 @@ __rep_walk_dir(env, dir, datadir, context)
 	const char *dir, *datadir;
 	FILE_LIST_CTX *context;
 {
-	__rep_fileinfo_args tmpfp;
-	size_t avail, len;
-	int cnt, first_file, i, ret;
-	u_int8_t uid[DB_FILE_ID_LEN];
-	char *file, **names, *subdb;
+	int cnt, ret;
+	char **names;
 
 	if (dir == NULL) {
 		VPRINT(env, (env, DB_VERB_REP_SYNC,
@@ -304,7 +946,34 @@ __rep_walk_dir(env, dir, datadir, context)
 	}
 	VPRINT(env, (env, DB_VERB_REP_SYNC, "Walk_dir: Dir %s has %d files",
 	    (dir == NULL) ? "INMEM" : dir, cnt));
+	ret = __rep_add_files_to_list(
+	    env, dir, datadir, context, (const char **)names, cnt);
+
+	__os_dirfree(env, names, cnt);
+	return (ret);
+}
+
+/*
+ * __rep_add_files_to_list --
+ *
+ * Add the given files to the file list.
+ */
+static int
+__rep_add_files_to_list(env, dir, datadir, context, names, cnt)
+	ENV *env;
+	const char *dir, *datadir;
+	FILE_LIST_CTX *context;
+	const char **names;
+	int cnt;
+{
+	__rep_fileinfo_args tmpfp;
+	size_t avail, len;
+	int first_file, i, ret;
+	u_int8_t uid[DB_FILE_ID_LEN];
+	const char *file, *subdb;
+
 	first_file = 1;
+	ret = 0;
 	for (i = 0; i < cnt; i++) {
 		VPRINT(env, (env, DB_VERB_REP_SYNC,
 		    "Walk_dir: File %d name: %s", i, names[i]));
@@ -372,15 +1041,19 @@ __rep_walk_dir(env, dir, datadir, context)
 		DB_SET_DBT(tmpfp.uid, uid, DB_FILE_ID_LEN);
 retry:		avail = (size_t)(&context->buf[context->size] -
 		    context->fillptr);
+		/*
+		 * It is safe to cast to the old structs
+		 * because the first part of the current
+		 * struct matches the old structs.
+		 */
 		if (context->version < DB_REPVERSION_53)
-			/*
-			 * It is safe to cast to the old struct
-			 * because the first part of the current
-			 * struct matches the old struct.
-			 */
 			ret = __rep_fileinfo_v6_marshal(env, context->version,
 			    (__rep_fileinfo_v6_args *)&tmpfp,
 			    context->fillptr, avail, &len);
+		else if (context->version < DB_REPVERSION_61)
+			ret = __rep_fileinfo_v7_marshal(env, context->version,
+			    (__rep_fileinfo_v7_args *)&tmpfp,
+			    context->fillptr, avail, &len);
 		else
 			ret = __rep_fileinfo_marshal(env, context->version,
 			    &tmpfp, context->fillptr, avail, &len);
@@ -409,9 +1082,7 @@ retry:		avail = (size_t)(&context->buf[context->size] -
 		 */
 		context->fillptr += len;
 	}
-err:
-	__os_dirfree(env, names, cnt);
-	return (ret);
+err:	return (ret);
 }
 
 /*
@@ -430,7 +1101,7 @@ __rep_is_replicated_db(name, dir)
 	/*
 	 * Remaining things that don't have a "__db" prefix are eligible.
 	 */
-	if (!IS_DB_FILE(name))
+	if (!IS_DB_FILE(name) || IS_BLOB_META(name))
 		return (1);
 
 	/* Here, we know we have a "__db" name. */
@@ -470,7 +1141,7 @@ __rep_check_uid(env, rfp, uid)
 	if (memcmp(rfp->uid.data, uid, DB_FILE_ID_LEN) == 0) {
 		VPRINT(env, (env, DB_VERB_REP_SYNC,
 			"Check_uid: Found matching file."));
-		ret = DB_KEYEXIST;
+		ret = USR_ERR(env, DB_KEYEXIST);
 	}
 	return (ret);
 
@@ -489,6 +1160,7 @@ __rep_get_fileinfo(env, file, subdb, rfp, uid)
 	DB_THREAD_INFO *ip;
 	PAGE *pagep;
 	int lorder, ret, t_ret;
+	u_int32_t flags;
 
 	dbp = NULL;
 	dbc = NULL;
@@ -503,11 +1175,15 @@ __rep_get_fileinfo(env, file, subdb, rfp, uid)
 	 * database handles would block the master from handling UPDATE_REQ.
 	 */
 	F_SET(dbp, DB_AM_RECOVER);
-	if ((ret = __db_open(dbp, ip, NULL, file, subdb, DB_UNKNOWN,
-	    DB_RDONLY | (F_ISSET(env, ENV_THREAD) ? DB_THREAD : 0),
-	    0, PGNO_BASE_MD)) != 0)
+	flags = DB_RDONLY | (F_ISSET(env, ENV_THREAD) ? DB_THREAD : 0);
+	if (file != NULL && IS_BLOB_META(file))
+		LF_SET(DB_INTERNAL_BLOB_DB);
+	if ((ret = __db_open(dbp, ip, NULL,
+	    file, subdb, DB_UNKNOWN, flags, 0, PGNO_BASE_MD)) != 0)
 		goto err;
 
+	SET_LO_HI_VAR(dbp->blob_file_id, rfp->blob_fid_lo, rfp->blob_fid_hi);
+
 	if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0)
 		goto err;
 	if ((ret = __memp_fget(dbp->mpf, &dbp->meta_pgno, ip, dbc->txn,
@@ -574,6 +1250,7 @@ __rep_page_req(env, ip, eid, rp, rec)
 {
 	__rep_fileinfo_args *msgfp, msgf;
 	__rep_fileinfo_v6_args *msgfpv6;
+	__rep_fileinfo_v7_args *msgfpv7;
 	DB_MPOOLFILE *mpf;
 	DB_REP *db_rep;
 	REP *rep;
@@ -584,21 +1261,30 @@ __rep_page_req(env, ip, eid, rp, rec)
 	db_rep = env->rep_handle;
 	rep = db_rep->region;
 
+	/*
+	 * Build a current struct by copying in the older
+	 * version struct and then setting up the new fields.
+	 * This is safe because all old fields are in the
+	 * same location in the current struct.
+	 */
 	if (rp->rep_version < DB_REPVERSION_53) {
-		/*
-		 * Build a current struct by copying in the older
-		 * version struct and then setting up the data_dir.
-		 * This is safe because all old fields are in the
-		 * same location in the current struct.
-		 */
 		if ((ret = __rep_fileinfo_v6_unmarshal(env, rp->rep_version,
 		    &msgfpv6, rec->data, rec->size, &next)) != 0)
 			return (ret);
 		memcpy(&msgf, msgfpv6, sizeof(__rep_fileinfo_v6_args));
 		msgf.dir.data = NULL;
 		msgf.dir.size = 0;
+		msgf.blob_fid_lo = msgf.blob_fid_hi = 0;
 		msgfp = &msgf;
 		msgfree = msgfpv6;
+	} else if (rp->rep_version < DB_REPVERSION_61) {
+		if ((ret = __rep_fileinfo_v7_unmarshal(env, rp->rep_version,
+		    &msgfpv7, rec->data, rec->size, &next)) != 0)
+			return (ret);
+		memcpy(&msgf, msgfpv7, sizeof(__rep_fileinfo_v7_args));
+		msgf.blob_fid_lo = msgf.blob_fid_hi = 0;
+		msgfp = &msgf;
+		msgfree = msgfpv7;
 	} else {
 		if ((ret = __rep_fileinfo_unmarshal(env, rp->rep_version,
 		    &msgfp, rec->data, rec->size, &next)) != 0)
@@ -624,7 +1310,7 @@ __rep_page_req(env, ip, eid, rp, rec)
 			(void)__rep_send_message(env, eid, REP_FILE_FAIL,
 			    NULL, rec, 0, 0);
 		else
-			ret = DB_NOTFOUND;
+			ret = USR_ERR(env, DB_NOTFOUND);
 		goto err;
 	}
 
@@ -738,7 +1424,7 @@ __rep_page_sendpages(env, ip, eid, rp, msgfp, mpf, dbp)
 #ifdef HAVE_QUEUE
 			if ((ret = __qam_fget(qdbc, &p, 0, &pagep)) == ENOENT)
 #endif
-				ret = DB_PAGE_NOTFOUND;
+				ret = USR_ERR(env, DB_PAGE_NOTFOUND);
 		} else
 			ret = __memp_fget(mpf, &p, ip, NULL, 0, &pagep);
 		msgfp->pgno = p;
@@ -748,16 +1434,21 @@ __rep_page_sendpages(env, ip, eid, rp, msgfp, mpf, dbp)
 				RPRINT(env, (env, DB_VERB_REP_SYNC,
 				    "sendpages: PAGE_FAIL on page %lu",
 				    (u_long)p));
+				/*
+				 * It is safe to cast to the old structs
+				 * because the first part of the current
+				 * struct matches the old structs.
+				 */
 				if (rp->rep_version < DB_REPVERSION_53)
-					/*
-					 * It is safe to cast to the old struct
-					 * because the first part of the current
-					 * struct matches the old struct.
-					 */
 					ret = __rep_fileinfo_v6_marshal(env,
 					    rp->rep_version,
 					    (__rep_fileinfo_v6_args *)msgfp,
 					    buf, msgsz, &len);
+				else if (rp->rep_version < DB_REPVERSION_61)
+					ret = __rep_fileinfo_v7_marshal(env,
+					    rp->rep_version,
+					    (__rep_fileinfo_v7_args *)msgfp,
+					    buf, msgsz, &len);
 				else
 					ret = __rep_fileinfo_marshal(env,
 					    rp->rep_version, msgfp, buf,
@@ -772,7 +1463,7 @@ __rep_page_sendpages(env, ip, eid, rp, msgfp, mpf, dbp)
 				    REP_PAGE_FAIL, &lsn, &msgdbt, 0, 0);
 				continue;
 			} else
-				ret = DB_NOTFOUND;
+				ret = USR_ERR(env, DB_NOTFOUND);
 			goto err;
 		} else if (ret != 0)
 			goto err;
@@ -796,16 +1487,21 @@ __rep_page_sendpages(env, ip, eid, rp, msgfp, mpf, dbp)
 		RPRINT(env, (env, DB_VERB_REP_SYNC,
 		    "sendpages: %lu, page lsn [%lu][%lu]", (u_long)p,
 		    (u_long)pagep->lsn.file, (u_long)pagep->lsn.offset));
+		/*
+		 * It is safe to cast to the old structs
+		 * because the first part of the current
+		 * structs matches the old struct.
+		 */
 		if (rp->rep_version < DB_REPVERSION_53)
-			/*
-			 * It is safe to cast to the old struct
-			 * because the first part of the current
-			 * struct matches the old struct.
-			 */
 			ret = __rep_fileinfo_v6_marshal(env,
 			    rp->rep_version,
 			    (__rep_fileinfo_v6_args *)msgfp,
 			    buf, msgsz, &len);
+		else if (rp->rep_version < DB_REPVERSION_61)
+			ret = __rep_fileinfo_v7_marshal(env,
+			    rp->rep_version,
+			    (__rep_fileinfo_v7_args *)msgfp,
+			    buf, msgsz, &len);
 		else
 			ret = __rep_fileinfo_marshal(env, rp->rep_version,
 			    msgfp, buf, msgsz, &len);
@@ -1010,7 +1706,8 @@ __rep_update_setup(env, eid, rp, rec, savetime, lsn)
 	ZERO_LSN(lp->waiting_lsn);
 	ZERO_LSN(lp->max_wait_lsn);
 	ZERO_LSN(lp->max_perm_lsn);
-	if (db_rep->rep_db == NULL)
+	ret = __rep_blob_cleanup(env, rep);
+	if (ret == 0 && db_rep->rep_db == NULL)
 		ret = __rep_client_dbinit(env, 0, REP_DB);
 	MUTEX_UNLOCK(env, rep->mtx_clientdb);
 	if (ret != 0)
@@ -1148,6 +1845,337 @@ err:	/*
 	return (ret);
 }
 
+/*
+ * __rep_blob_update
+ *	Prepare to receive blob file data by setting up the blob gap database,
+ *	then requesting the blob file data.
+ *
+ * PUBLIC: int __rep_blob_update __P((ENV *, int, DB_THREAD_INFO *, DBT *));
+ */
+int
+__rep_blob_update(env, eid, ip, rec)
+	ENV *env;
+	int eid;
+	DB_THREAD_INFO *ip;
+	DBT *rec;
+{
+	DBC *dbc;
+	DB_REP *db_rep;
+	DBT data, key;
+	REP *rep;
+	REGINFO *infop;
+	__rep_blob_file_args rbf;
+	__rep_blob_update_args rbu;
+	__rep_fileinfo_args *rfp;
+	db_seq_t blob_fid;
+	int ret;
+	off_t offset;
+	size_t len;
+	u_int32_t num_blobs;
+	u_int8_t keybuf[BLOB_KEY_SIZE], *ptr;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	infop = env->reginfo;
+	rfp = NULL;
+	dbc = NULL;
+	memset(&rbu, 0, sizeof(__rep_blob_update_args));
+	memset(&rbf, 0, sizeof(__rep_blob_file_args));
+
+	if ((ret = __rep_blob_update_unmarshal(
+	    env, &rbu, rec->data, rec->size, &ptr)) != 0)
+		return (ret);
+	len = rec->size - __REP_BLOB_UPDATE_SIZE;
+
+	RPRINT(env, (env, DB_VERB_REP_SYNC,
+"blob_update: file_id %llu, num_blobs %lu, flags %lu, highest %llu",
+	    (long long)rbu.blob_fid, (long)rbu.num_blobs,
+	    (unsigned long)rbu.flags, (long long)rbu.highest_id));
+
+	MUTEX_LOCK(env, rep->mtx_clientdb);
+	REP_SYSTEM_LOCK(env);
+
+	/*
+	 * Check if the world changed.
+	 */
+	if (rep->sync_state != SYNC_PAGE)
+		goto unlock;
+
+	/* Make sure this is for the current database. */
+	GET_CURINFO(rep, infop, rfp);
+	GET_LO_HI(env, rfp->blob_fid_lo, rfp->blob_fid_hi, blob_fid, ret);
+	if (ret != 0)
+		goto unlock;
+
+	if (blob_fid != (db_seq_t)rbu.blob_fid)
+		goto unlock;
+
+	rep->highest_id = (db_seq_t)rbu.highest_id;
+	/*
+	 * For each blob file, add an entry to the database for each 1 MB
+	 * section of that file.  The entries will be deleted as the
+	 * coresponding blob chunks arrive and are written to disk.
+	 */
+	if (db_rep->blob_dbp == NULL &&
+	    (ret = __rep_client_dbinit(env, 0, REP_BLOB)) != 0)
+		goto unlock;
+
+	if ((ret = __db_cursor(db_rep->blob_dbp, ip, NULL, &dbc, 0)) != 0)
+		goto unlock;
+
+	/*
+	 * Make sure no one else has populated the database, this could happen
+	 * if the update message is sent twice.
+	 */
+	memset(&key, 0, sizeof(DBT));
+	memset(&data, 0, sizeof(DBT));
+	if ((ret = __dbc_get(dbc, &key, &data, DB_FIRST)) != DB_NOTFOUND)
+		goto unlock;
+
+	/* It is possible for a blob database to have no blobs. */
+	if (rbu.num_blobs == 0) {
+		(void)__dbc_close(dbc);
+		dbc = NULL;
+		rep->blob_more_files = 0;
+		rep->gap_bl_hi_id = rep->gap_bl_hi_sid = 0;
+		rep->last_blob_id = rep->last_blob_sid = 0;
+		rep->prev_blob_id = rep->prev_blob_sid = 0;
+		rep->gap_bl_hi_off = 0;
+		rep->blob_sync = 0;
+		rep->highest_id = 0;
+		rep->blob_rereq = 0;
+		ret = __rep_blobdone(env, eid, ip, rep, blob_fid, 0);
+		goto unlock;
+	}
+
+	memset(&key, 0, sizeof(DBT));
+	memset(&data, 0, sizeof(DBT));
+	data.flags = key.flags = DB_DBT_USERMEM;
+	key.data = keybuf;
+	key.ulen = key.size = BLOB_KEY_SIZE;
+	data.data = (void *)&offset;
+	data.ulen = data.size = sizeof(offset);
+	num_blobs = 0;
+	while (num_blobs < rbu.num_blobs) {
+		if ((ret =
+		    __rep_blob_file_unmarshal(env, &rbf, ptr, len, &ptr)) != 0)
+			goto unlock;
+		len -= __REP_BLOB_FILE_SIZE;
+
+		RPRINT(env, (env, DB_VERB_REP_SYNC,
+	"blob_update adding file: blob_id %llu, sdb_id %llu, blob_size %llu",
+		    (long long)rbf.blob_id, (long long)rbf.blob_sid,
+		    (long long)rbf.blob_size));
+
+		memcpy(keybuf, &rbf.blob_sid, BLOB_ID_SIZE);
+		memcpy(&(keybuf[BLOB_ID_SIZE]), &rbf.blob_id, BLOB_ID_SIZE);
+		offset = 0;
+		/*
+		 * Add an entry for each megabyte of the blob file.  Zero
+		 * length blob files should have at least one entry.
+		 */
+		do {
+			if ((ret = __dbc_put(dbc, &key, &data, 0)) != 0)
+				goto unlock;
+			offset += MEGABYTE;
+			/*
+			 * Check for overflow, this can happen when the master
+			 * supports 64 file offsets, but the client does not.
+			 */
+			if (offset < 0) {
+				__db_errx(env,
+				    DB_STR("3704",
+					"Blob file offset overflow"));
+				ret = EINVAL;
+				goto unlock;
+			}
+		} while ((u_int32_t)offset < rbf.blob_size);
+		num_blobs++;
+	}
+	/* Set whether there are more files after the ones on the list. */
+	if (F_ISSET(&rbu, BLOB_DONE))
+		rep->blob_more_files = 0;
+	else
+		rep->blob_more_files = 1;
+	rep->prev_blob_id = rep->last_blob_id;
+	rep->prev_blob_sid = rep->last_blob_sid;
+	rep->last_blob_sid = (db_seq_t)rbf.blob_sid;
+	rep->last_blob_id = (db_seq_t)rbf.blob_id;
+
+	/*
+	 * Send the same message payload in a REP_BLOB_ALL_REQ message to get
+	 * the blob data.  Peer-to-peer initialization is not supported for
+	 * blobs, so we can only send this back to the master despite the fact
+	 * that building the list of blob files is expensive. 
+	 */
+	(void)__rep_send_message(
+	    env, rep->master_id, REP_BLOB_ALL_REQ, NULL, rec, 0, 0);
+
+unlock:	REP_SYSTEM_UNLOCK(env);
+	MUTEX_UNLOCK(env, rep->mtx_clientdb);
+	if (dbc != NULL)
+		(void)__dbc_close(dbc);
+
+	return (ret);
+}
+
+/*
+ * __rep_blob_allreq
+ *	Request blob file data.
+ *
+ * PUBLIC: int __rep_blob_allreq __P((ENV *, int, DBT *));
+ */
+int
+__rep_blob_allreq(env, eid, rec)
+	ENV *env;
+	int eid;
+	DBT *rec;
+{
+	DB *dbp;
+	DB_FH *fhp;
+	DBT msg;
+	__rep_blob_chunk_args rbc;
+	__rep_blob_file_args rbf;
+	__rep_blob_update_args rbu;
+	db_seq_t old_sdb_id;
+	int done, ret;
+	off_t offset;
+	size_t len;
+	u_int32_t num_blobs;
+	u_int8_t *chunk_buf, *msg_buf, *ptr;
+
+	dbp = NULL;
+	fhp = NULL;
+	chunk_buf = msg_buf = NULL;
+	memset(&rbu, 0, sizeof(__rep_blob_update_args));
+	memset(&rbc, 0, sizeof(__rep_blob_chunk_args));
+	memset(&msg, 0, sizeof(DBT));
+
+	if ((ret =
+	    __os_malloc(env, MEGABYTE + __REP_BLOB_CHUNK_SIZE, &msg_buf)) != 0)
+		goto err;
+	msg.data = msg_buf;
+	msg.ulen = MEGABYTE + __REP_BLOB_CHUNK_SIZE;
+	if ((ret = __os_malloc(env, MEGABYTE, &chunk_buf)) != 0)
+		goto err;
+	rbc.data.data = chunk_buf;
+	rbc.data.ulen = MEGABYTE;
+	rbc.data.flags = DB_DBT_USERMEM;
+
+	/*
+	 * The REP_BLOB_ALL_REQ message sends the REP_BLOB_UPDATE message
+	 * payload back to the master to request the actual blobs after the
+	 * client has prepared itself to receive them.
+	 */
+	len = rec->size;
+	if ((ret = __rep_blob_update_unmarshal(
+	    env, &rbu, rec->data, rec->size, &ptr)) != 0)
+		goto err;
+	len -= __REP_BLOB_UPDATE_SIZE;
+
+	RPRINT(env, (env, DB_VERB_REP_SYNC,
+	    "blob_all_req: file_id %llu, num_blobs %lu, flags %lu",
+	    (long long)rbu.blob_fid, (long)rbu.num_blobs,
+	    (unsigned long)rbu.flags));
+
+	if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
+		goto err;
+	dbp->blob_file_id = (db_seq_t)rbu.blob_fid;
+	rbc.blob_fid = rbu.blob_fid;
+	num_blobs = 0;
+	/*
+	 * The list of files to send is included in the message, go
+	 * through the list and send each file in pieces.
+	 */
+	while (num_blobs < rbu.num_blobs) {
+		num_blobs++;
+		if ((ret = __rep_blob_file_unmarshal(
+		    env, &rbf, ptr, len, &ptr)) != 0)
+			goto err;
+		len -= __REP_BLOB_FILE_SIZE;
+		old_sdb_id = dbp->blob_sdb_id;
+		dbp->blob_sdb_id = (db_seq_t)rbf.blob_sid;
+		rbc.flags = 0;
+		rbc.blob_sid = rbf.blob_sid;
+		rbc.blob_id = rbf.blob_id;
+		/* Free the sub-directory information if it has changed. */
+		if (old_sdb_id != dbp->blob_sdb_id &&
+		    dbp->blob_sub_dir != NULL) {
+			__os_free(env, dbp->blob_sub_dir);
+			dbp->blob_sub_dir = NULL;
+		}
+		if (dbp->blob_sub_dir == NULL) {
+			if ((ret = __blob_make_sub_dir(env, &dbp->blob_sub_dir,
+			    dbp->blob_file_id, dbp->blob_sdb_id)) != 0)
+				goto err;
+		}
+		if ((ret = __blob_file_open(dbp,
+		    &fhp, (db_seq_t)rbf.blob_id, DB_FOP_READONLY, 0)) != 0) {
+			/*
+			 * The file may have been deleted between creating the
+			 * list and sending the data.  Send a message saying
+			 * the file has been deleted.
+			 */
+			if (ret == ENOENT) {
+				F_SET(&rbc, BLOB_DELETE);
+				rbc.data.size = 0;
+				__rep_blob_chunk_marshal(env, &rbc, msg.data);
+				msg.size = __REP_BLOB_CHUNK_SIZE;
+				(void)__rep_send_message(env,
+				    eid, REP_BLOB_CHUNK, NULL, &msg, 0, 0);
+				ret = 0;
+				fhp = NULL;
+				continue;
+			}
+			goto err;
+		}
+		offset = 0;
+		do {
+			done = 0;
+			rbc.flags = 0;
+			if ((ret = __blob_file_read(
+			    env, fhp, &rbc.data, offset, MEGABYTE)) != 0)
+				goto err;
+			DB_ASSERT(env, rbc.data.size <= MEGABYTE);
+
+			/*
+			 * In rare cases the blob file may have gotten shorter
+			 * since the list was created.
+			 */
+			if (rbc.data.size < (u_int32_t)MEGABYTE && (u_int64_t)
+			    (offset + rbc.data.size) < rbf.blob_size) {
+				F_SET(&rbc, BLOB_CHUNK_FAIL);
+				done = 1;
+			}
+			/* File may have grown since the list was made. */
+			if ((u_int64_t)
+			    (offset + rbc.data.size) > rbf.blob_size) {
+				rbc.data.size =
+				    (u_int32_t)((off_t)rbf.blob_size - offset);
+			}
+			rbc.offset = (u_int64_t)offset;
+			__rep_blob_chunk_marshal(env, &rbc, msg.data);
+			msg.size = __REP_BLOB_CHUNK_SIZE + rbc.data.size;
+			(void)__rep_send_message(
+			    env, eid, REP_BLOB_CHUNK, NULL, &msg, 0, 0);
+			offset += MEGABYTE;
+		} while ((u_int64_t)offset < rbf.blob_size && !done);
+
+		if (fhp != NULL && (ret = __os_closehandle(env, fhp)) != 0)
+			goto err;
+		fhp = NULL;
+	}
+err:	if (chunk_buf != NULL)
+		__os_free(env, chunk_buf);
+	if (msg_buf != NULL)
+		__os_free(env, msg_buf);
+	if (fhp != NULL)
+		(void)__os_closehandle(env, fhp);
+	if (dbp != 0)
+		(void)__db_close(dbp, NULL, 0);
+	return (ret);
+}
+
 static int
 __rep_find_inmem(env, rfp, unused)
 	ENV *env;
@@ -1157,6 +2185,11 @@ __rep_find_inmem(env, rfp, unused)
 	COMPQUIET(env, NULL);
 	COMPQUIET(unused, NULL);
 
+	/*
+	 * Cannot assume all databases are in-memory because abbreviated
+	 * internal inits from 5.3 and earlier are not limited to in-memory
+	 * databases.
+	 */
 	return (FLD_ISSET(rfp->db_flags, DB_AM_INMEM) ? DB_KEYEXIST : 0);
 }
 
@@ -1172,12 +2205,9 @@ __rep_remove_nimdbs(env)
 	FILE_LIST_CTX context;
 	int ret;
 
-	if ((ret = __os_calloc(env, 1, MEGABYTE, &context.buf)) != 0)
+	if ((ret = __rep_init_file_list_context(env,
+	    DB_REPVERSION, 0, 0, &context)) != 0)
 		return (ret);
-	context.size = MEGABYTE;
-	context.count = 0;
-	context.fillptr = context.buf;
-	context.version = DB_REPVERSION;
 
 	/* NB: "NULL" asks walk_dir to consider only in-memory DBs */
 	if ((ret = __rep_walk_dir(env, NULL, NULL, &context)) != 0)
@@ -1240,14 +2270,11 @@ __rep_remove_all(env, msg_version, rec)
 	 * 1. Get list of databases currently present at this client, which we
 	 *    intend to remove.
 	 */
-	if ((ret = __os_calloc(env, 1, MEGABYTE, &context.buf)) != 0)
-		return (ret);
-	context.size = MEGABYTE;
-	context.count = 0;
-	context.version = DB_REPVERSION;
 
 	/* Reserve space for the marshaled update_args. */
-	context.fillptr = FIRST_FILE_PTR(context.buf);
+	if ((ret = __rep_init_file_list_context(env,
+	    DB_REPVERSION, 0, 1, &context)) != 0)
+		return (ret);
 
 	if ((ret = __rep_find_dbs(env, &context)) != 0)
 		goto out;
@@ -1333,6 +2360,9 @@ __rep_remove_all(env, msg_version, rec)
 	    FIRST_FILE_PTR(context.buf), context.size,
 	    context.count, __rep_remove_file, NULL)) != 0)
 		goto out;
+	/* Remove the blob directory. */
+	if ((ret = __blob_del_hierarchy(env)) != 0)
+		goto out;
 
 	/*
 	 * 4. Safe-store the (new) list of database files we intend to copy from
@@ -1445,6 +2475,8 @@ __rep_remove_file(env, rfp, unused)
 #ifdef HAVE_QUEUE
 	DB_THREAD_INFO *ip;
 #endif
+	APPNAME appname;
+	db_seq_t blob_fid, blob_sid;
 	char *name;
 	int ret, t_ret;
 
@@ -1496,29 +2528,53 @@ __rep_remove_file(env, rfp, unused)
 	 * That will only have removed extent files.  Now
 	 * we need to deal with the actual file itself.
 	 */
+	appname = __rep_is_internal_rep_file(rfp->info.data) ?
+	    DB_APP_META : (IS_BLOB_META(rfp->info.data) ?
+	    DB_APP_BLOB : DB_APP_DATA);
 	if (FLD_ISSET(rfp->db_flags, DB_AM_INMEM)) {
 		if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
 			return (ret);
 		MAKE_INMEM(dbp);
 		F_SET(dbp, DB_AM_RECOVER); /* Skirt locking. */
 		ret = __db_inmem_remove(dbp, NULL, name);
-	} else if ((ret = __fop_remove(env,
-		    NULL, rfp->uid.data, name, (const char **)&rfp->dir.data,
-		    __rep_is_internal_rep_file(rfp->info.data) ?
-		    DB_APP_META : DB_APP_DATA, 0)) != 0)
+	} else if ((ret = __fop_remove(env, NULL, rfp->uid.data, name,
+	    (const char **)&rfp->dir.data, appname, 0)) != 0) {
 			/*
 			 * If fop_remove fails, it could be because
 			 * the client has a different data_dir
 			 * structure than the master.  Retry with the
-			 * local, default settings. 
+			 * local, default settings.
 			 */
 			ret = __fop_remove(env,
-			    NULL, rfp->uid.data, name, NULL,
-			    __rep_is_internal_rep_file(rfp->info.data) ?
-			    DB_APP_META : DB_APP_DATA, 0);
-#ifdef HAVE_QUEUE
-out:
+			    NULL, rfp->uid.data, name, NULL, appname, 0);
+#ifdef DB_WIN32
+			/*
+			 * Deleting a blob meta database can result in a
+			 * ERROR_PATH_NOT_FOUND error on windows, so treat
+			 * that as an ENOENT.
+			 */
+			if (__os_posix_err(ret) == ENOENT)
+				ret = ENOENT;
 #endif
+	}
+	    /* Clean any blob directories. */
+	if (ret == 0 && appname == DB_APP_BLOB) {
+		/* dbp has not been set, since queues do not support blobs. */
+		DB_ASSERT(env, dbp == NULL);
+		if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
+			goto out;
+		if ((ret = __blob_path_to_dir_ids(
+		    env, name, &blob_fid, &blob_sid)) != 0)
+			goto out;
+		/* blob_fid == 0 if it is the top level blob meta db. */
+		if (blob_fid != 0) {
+			dbp->blob_file_id = blob_fid;
+			dbp->blob_sdb_id = blob_sid;
+			if ((ret = __blob_del_all(dbp, NULL, 0)) != 0)
+				goto out;
+		}
+	}
+out:
 	if (dbp != NULL &&
 	    (t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0)
 		ret = t_ret;
@@ -1610,10 +2666,11 @@ __rep_page(env, ip, eid, rp, rec)
 {
 
 	DB_REP *db_rep;
-	DBT key, data;
+	DBT data, key;
 	REP *rep;
 	__rep_fileinfo_args *msgfp, msgf;
 	__rep_fileinfo_v6_args *msgfpv6;
+	__rep_fileinfo_v7_args *msgfpv7;
 	db_recno_t recno;
 	int ret;
 	char *msg;
@@ -1647,21 +2704,30 @@ __rep_page(env, ip, eid, rp, rec)
 		    (u_long)rep->first_lsn.offset));
 		return (DB_REP_PAGEDONE);
 	}
+	/*
+	 * Build a current struct by copying in the older
+	 * version struct and then setting up the new fields.
+	 * This is safe because all old fields are in the
+	 * same location in the current struct.
+	 */
 	if (rp->rep_version < DB_REPVERSION_53) {
-		/*
-		 * Build a current struct by copying in the older
-		 * version struct and then setting up the data_dir.
-		 * This is safe because all old fields are in the
-		 * same location in the current struct.
-		 */
 		if ((ret = __rep_fileinfo_v6_unmarshal(env, rp->rep_version,
 		    &msgfpv6, rec->data, rec->size, NULL)) != 0)
 			return (ret);
 		memcpy(&msgf, msgfpv6, sizeof(__rep_fileinfo_v6_args));
 		msgf.dir.data = NULL;
 		msgf.dir.size = 0;
+		msgf.blob_fid_lo = msgf.blob_fid_hi = 0;
 		msgfp = &msgf;
 		msgfree = msgfpv6;
+	} else if (rp->rep_version < DB_REPVERSION_61) {
+		if ((ret = __rep_fileinfo_v7_unmarshal(env, rp->rep_version,
+		    &msgfpv7, rec->data, rec->size, NULL)) != 0)
+			return (ret);
+		memcpy(&msgf, msgfpv7, sizeof(__rep_fileinfo_v7_args));
+		msgf.blob_fid_lo = msgf.blob_fid_hi = 0;
+		msgfp = &msgf;
+		msgfree = msgfpv7;
 	} else {
 		if ((ret = __rep_fileinfo_unmarshal(env, rp->rep_version,
 		    &msgfp, rec->data, rec->size, NULL)) != 0)
@@ -1671,9 +2737,9 @@ __rep_page(env, ip, eid, rp, rec)
 	MUTEX_LOCK(env, rep->mtx_clientdb);
 	REP_SYSTEM_LOCK(env);
 	/*
-	 * Check if the world changed.
+	 * Check if the world changed or if we are in the blob sync phase.
 	 */
-	if (rep->sync_state != SYNC_PAGE) {
+	if (rep->sync_state != SYNC_PAGE || rep->blob_sync != 0) {
 		ret = DB_REP_PAGEDONE;
 		goto err;
 	}
@@ -1785,6 +2851,218 @@ err:	REP_SYSTEM_UNLOCK(env);
 }
 
 /*
+ * __rep_blob_chunk
+ *	Process a blob chunk message.  When a blob chunk arrives, delete its
+ *	entry in the blob chunk gap database to show that it has arrived, and
+ *	write the data to the blob file.
+ *
+ * PUBLIC: int __rep_blob_chunk __P((ENV *, int, DB_THREAD_INFO *, DBT *));
+ */
+int
+__rep_blob_chunk(env, eid, ip, rec)
+	ENV *env;
+	int eid;
+	DB_THREAD_INFO *ip;
+	DBT *rec;
+{
+	DB_REP *db_rep;
+	DBC *dbc;
+	DB_FH *fhp;
+	DBT data, key;
+	REP *rep;
+	REGINFO *infop;
+	__rep_blob_chunk_args rbc;
+	__rep_fileinfo_args *rfp;
+	db_seq_t blob_fid;
+	char *blob_sub_dir, *last, *mkpath, *name, *path;
+	int ret;
+	off_t offset;
+	u_int8_t keybuf[BLOB_KEY_SIZE], *ptr;
+
+	ret = 0;
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	infop = env->reginfo;
+	dbc = NULL;
+	blob_sub_dir = name = NULL;
+	path = NULL;
+	fhp = NULL;
+
+	if (rep->sync_state != SYNC_PAGE)
+		return (DB_REP_PAGEDONE);
+
+	if ((ret = __rep_blob_chunk_unmarshal(
+	    env, &rbc, rec->data, rec->size, &ptr)) != 0)
+		return (ret);
+
+	MUTEX_LOCK(env, rep->mtx_clientdb);
+	REP_SYSTEM_LOCK(env);
+	/*
+	 * Check if the world changed.
+	 */
+	if (rep->sync_state != SYNC_PAGE) {
+		ret = DB_REP_PAGEDONE;
+		goto err;
+	}
+	/*
+	 * We should not ever be in internal init with a lease granted.
+	 */
+	DB_ASSERT(env,
+	    !IS_USING_LEASES(env) || __rep_islease_granted(env) == 0);
+
+	/* Make sure this is for the current file. */
+	GET_CURINFO(rep, infop, rfp);
+	GET_LO_HI(env, rfp->blob_fid_lo, rfp->blob_fid_hi, blob_fid, ret);
+	if (ret != 0)
+		goto err;
+
+	if (blob_fid != (db_seq_t)rbc.blob_fid) {
+		ret = DB_REP_PAGEDONE;
+		goto err;
+	}
+
+	RPRINT(env, (env, DB_VERB_REP_SYNC,
+"REP_BLOB_CHUNK: blob_fid %llu, blob_sid %llu, blob_id %llu, offset %llu",
+	    (unsigned long long)rbc.blob_fid,
+	    (unsigned long long)rbc.blob_sid,
+	    (unsigned long long)rbc.blob_id, (long long)rbc.offset));
+
+	if (db_rep->blob_dbp == NULL &&
+	    (ret = __rep_client_dbinit(env, 0, REP_BLOB)) != 0) {
+		RPRINT(env, (env, DB_VERB_REP_SYNC,
+		    "REP_BLOB_CHUNK: Client_dbinit %s",
+		    db_strerror(ret)));
+		goto err;
+	}
+
+	/* Set the highest blob chunk received. */
+	if (rbc.blob_sid > (u_int64_t)rep->gap_bl_hi_sid ||
+	    (rbc.blob_sid == (u_int64_t)rep->gap_bl_hi_sid &&
+	    rbc.blob_id > (u_int64_t)rep->gap_bl_hi_id) ||
+	    (rbc.blob_sid == (u_int64_t)rep->gap_bl_hi_sid &&
+	    rbc.blob_id == (u_int64_t)rep->gap_bl_hi_id &&
+	    rbc.offset > (u_int64_t)rep->gap_bl_hi_off)) {
+		rep->gap_bl_hi_id = (db_seq_t)rbc.blob_id;
+		rep->gap_bl_hi_sid = (db_seq_t)rbc.blob_sid;
+		rep->gap_bl_hi_off = (off_t)rbc.offset;
+	}
+
+	memset(&key, 0, sizeof(DBT));
+	memset(&data, 0, sizeof(DBT));
+	data.flags = key.flags = DB_DBT_USERMEM;
+	key.data = keybuf;
+	key.ulen = key.size = BLOB_KEY_SIZE;
+	data.data = (void *)&offset;
+	data.ulen = data.size = sizeof(offset);
+	/* BLOB_DELETE is set if the blob file was deleted. */
+	if (F_ISSET(&rbc, BLOB_DELETE)) {
+		memcpy(keybuf, &rbc.blob_sid, BLOB_ID_SIZE);
+		memcpy(&(keybuf[BLOB_ID_SIZE]), &rbc.blob_id, BLOB_ID_SIZE);
+		if ((ret = __db_del(
+		    db_rep->blob_dbp, ip, NULL, &key, 0)) != 0) {
+			if (ret == DB_NOTFOUND)
+				ret = 0;
+			goto err;
+		}
+		goto done;
+	}
+
+	if ((ret = __db_cursor(db_rep->blob_dbp, ip, NULL, &dbc, 0)) != 0)
+		goto err;
+	offset = (off_t)rbc.offset;
+	memcpy(keybuf, &rbc.blob_sid, BLOB_ID_SIZE);
+	memcpy(&(keybuf[BLOB_ID_SIZE]), &rbc.blob_id, BLOB_ID_SIZE);
+	/* If not found we have already dealt with this chunk. */
+	if ((ret = __dbc_get(dbc, &key, &data, DB_GET_BOTH)) != 0) {
+		if (ret == DB_NOTFOUND) {
+			ret = 0;
+			goto done;
+		}
+		goto err;
+	}
+	/*
+	 * BLOB_CHUNK_FAIL is set if the blob file was truncated to shorter
+	 * than the BLOB_CHUNK offset.
+	 */
+	if (F_ISSET(&rbc, BLOB_CHUNK_FAIL)) {
+		while (ret == 0) {
+			if ((ret = __dbc_del(dbc, 0)) != 0)
+				goto err;
+			ret = __dbc_get(dbc, &key, &data, DB_NEXT_DUP);
+		}
+		if (ret == DB_NOTFOUND)
+			ret = 0;
+		if ((ret = __dbc_close(dbc)) != 0)
+			goto err;
+		dbc = NULL;
+		goto done;
+	}
+	if ((ret = __dbc_del(dbc, 0)) != 0)
+		goto err;
+	if ((ret = __dbc_close(dbc)) != 0)
+		goto err;
+	dbc = NULL;
+
+	if ((ret = __blob_make_sub_dir(env, &blob_sub_dir,
+	    (db_seq_t)rbc.blob_fid, (db_seq_t)rbc.blob_sid)) != 0)
+		goto err;
+
+	if ((ret = __blob_id_to_path(
+	    env, blob_sub_dir, (db_seq_t)rbc.blob_id, &name)) != 0)
+		goto err;
+
+	if ((ret = __db_appname(env, DB_APP_BLOB, name, NULL, &path)) != 0 )
+		goto err;
+
+	last = __db_rpath(path);
+	DB_ASSERT(env, last != NULL);
+	*last = '\0';
+	if (__os_exists(env, path, NULL) != 0) {
+		*last = PATH_SEPARATOR[0];
+		mkpath = path;
+#ifdef DB_WIN32
+		/*
+		 * Absolute paths on windows can result in it creating a "C"
+		 *  or "D" directory in the working directory.
+		 */
+		if (__os_abspath(mkpath))
+			mkpath += 2;
+#endif
+		if ((ret = __db_mkpath(env, mkpath)) != 0)
+			goto err;
+	}
+	*last = PATH_SEPARATOR[0];
+	if ((ret = __os_open(
+	    env, path, 0, DB_OSO_CREATE, env->db_mode, &fhp)) != 0)
+		goto err;
+
+	/* Write the data into the blob file. */
+	if ((ret = __fop_write_file(env, NULL, name, NULL, DB_APP_BLOB,
+	    fhp, (off_t)rbc.offset, rbc.data.data, rbc.data.size, 0)) != 0)
+		goto err;
+	if ((ret = __os_closehandle(env, fhp)) != 0)
+		goto err;
+	fhp = NULL;
+
+done:	ret = __rep_blobdone(env, eid, ip, rep, blob_fid, 0);
+
+err:	REP_SYSTEM_UNLOCK(env);
+	MUTEX_UNLOCK(env, rep->mtx_clientdb);
+	if (path != NULL)
+		__os_free(env, path);
+	if (blob_sub_dir != NULL)
+		__os_free(env, blob_sub_dir);
+	if (name != NULL)
+		__os_free(env, name);
+	if (fhp != NULL)
+		(void)__os_closehandle(env, fhp);
+	if (dbc != NULL)
+		(void)__dbc_close(dbc);
+
+	return (ret);
+}
+
+/*
  * __rep_write_page -
  *	Write this page into a database.
  */
@@ -1801,13 +3079,16 @@ __rep_write_page(env, ip, rep, msgfp)
 	DB_PGINFO *pginfo;
 	DB_REP *db_rep;
 	REGINFO *infop;
+	APPNAME appname;
 	__rep_fileinfo_args *rfp;
+	char *blob_path;
 	int ret;
 	void *dst;
 
 	db_rep = env->rep_handle;
 	infop = env->reginfo;
 	rfp = NULL;
+	blob_path = NULL;
 
 	/*
 	 * If this is the first page we're putting in this database, we need
@@ -1830,15 +3111,39 @@ __rep_write_page(env, ip, rep, msgfp)
 			RPRINT(env, (env, DB_VERB_REP_SYNC,
 			    "rep_write_page: Calling fop_create for %s",
 			    (char *)rfp->info.data));
+			appname = (__rep_is_internal_rep_file(rfp->info.data) ?
+			    DB_APP_META : (IS_BLOB_META((char *)rfp->info.data)
+			    ? DB_APP_BLOB : DB_APP_DATA));
+			/*
+			 * May have to create the directory structure for blob
+			 * metadata databases.
+			 */
+			if (appname == DB_APP_BLOB) {
+				if ((ret = __db_appname(env,
+				     appname, rfp->info.data,
+				    (const char **)&rfp->dir.data,
+				    &blob_path)) != 0)
+					goto err;
+#ifdef DB_WIN32
+				/*
+				 * Absolute paths on windows can result in
+				 * it creating a "C" or "D"
+				 * directory in the working directory.
+				 */
+				if (__os_abspath(blob_path))
+					blob_path += 2;
+#endif
+				if ((ret = __db_mkpath(env, blob_path)) != 0)
+					goto err;
+			}
 			if ((ret = __fop_create(env, NULL, NULL,
 			    rfp->info.data, (const char **)&rfp->dir.data,
-			    __rep_is_internal_rep_file(rfp->info.data) ?
-			    DB_APP_META : DB_APP_DATA, env->db_mode, 0)) != 0) {
+			    appname, env->db_mode, 0)) != 0) {
 				/*
 				 * If fop_create fails, it could be because
 				 * the client has a different data_dir
 				 * structure than the master.  Retry with the
-				 * local, default settings. 
+				 * local, default settings.
 				 */
 				RPRINT(env, (env, DB_VERB_REP_SYNC,
     "rep_write_page: fop_create ret %d.  Retry for %s, master datadir %s",
@@ -1929,7 +3234,10 @@ __rep_write_page(env, ip, rep, msgfp)
 		ret = __memp_fput(db_rep->file_mpf,
 		    ip, dst, db_rep->file_dbp->priority);
 
-err:	return (ret);
+err:	if (blob_path != NULL)
+		__os_free(env, blob_path);
+
+	return (ret);
 }
 
 /*
@@ -1976,7 +3284,7 @@ __rep_page_gap(env, rep, msgfp, type)
 	 * Make sure we're still talking about the same file.
 	 * If not, we're done here.
 	 */
-	if (rfp->filenum != msgfp->filenum) {
+	if (rfp->filenum != msgfp->filenum || rep->blob_sync != 0) {
 		ret = DB_REP_PAGEDONE;
 		goto err;
 	}
@@ -2135,6 +3443,53 @@ err:
 }
 
 /*
+ * __rep_blob_cleanup -
+ *	Clean up blob internal init information.
+ *
+ *	Caller must hold client database mutex (mtx_clientdb) and
+ *	REP_SYSTEM_LOCK.
+ */
+static int
+__rep_blob_cleanup(env, rep)
+	ENV *env;
+	REP *rep;
+{
+	DB_REP *db_rep;
+	DB_THREAD_INFO *ip;
+	int ret, t_ret;
+	u_int32_t count;
+
+	ret = 0;
+	db_rep = env->rep_handle;
+
+	/*
+	 * Delete any remaining records in the blob chunk database.  The blob
+	 * chunk database contains descriptions of the blob chunks that have
+	 * yet to arrive.  If not deleted, the remaining records could
+	 * interfere with how the next REP_BLOB_UPDATE message is handled.
+	 */
+	if (db_rep->blob_dbp != NULL) {
+		ENV_GET_THREAD_INFO(env, ip);
+		ret = __db_truncate(db_rep->blob_dbp, ip, NULL, &count);
+		t_ret = __db_close(db_rep->blob_dbp, NULL, DB_NOSYNC);
+		if (ret == 0)
+			ret = t_ret;
+		db_rep->blob_dbp = NULL;
+	}
+	/* Reset blob internal init control values. */
+	rep->gap_bl_hi_id = rep->gap_bl_hi_sid = 0;
+	rep->last_blob_id = rep->last_blob_sid = 0;
+	rep->prev_blob_id = rep->prev_blob_sid = 0;
+	rep->gap_bl_hi_off = 0;
+	rep->blob_more_files = 0;
+	rep->blob_sync = 0;
+	rep->highest_id = 0;
+	rep->blob_rereq = 0;
+
+	return (ret);
+}
+
+/*
  * __rep_init_cleanup -
  *	Clean up internal initialization pieces.
  *
@@ -2162,9 +3517,10 @@ __rep_init_cleanup(env, rep, force)
 	/*
 	 * 1.  Close up the file data pointer we used.
 	 * 2.  Close/reset the page database.
-	 * 3.  Close/reset the queue database if we're forcing a cleanup.
-	 * 4.  Free current file info.
-	 * 5.  If we have all files or need to force, free original file info.
+	 * 3.  Close/truncate the blob chunk gap database.
+	 * 4.  Close/reset the queue database if we're forcing a cleanup.
+	 * 5.  Free current file info.
+	 * 6.  If we have all files or need to force, free original file info.
 	 */
 	if (db_rep->file_mpf != NULL) {
 		ret = __memp_fclose(db_rep->file_mpf, 0);
@@ -2176,6 +3532,15 @@ __rep_init_cleanup(env, rep, force)
 		if (ret == 0)
 			ret = t_ret;
 	}
+	/*
+	 * Truncate the blob chunk gap database, since entries in the database
+	 * are for blob chunks we are expecting to arrive.  Also reset blob
+	 * internal init control values.
+	 */
+	t_ret = __rep_blob_cleanup(env, rep);
+	if (ret == 0)
+		ret = t_ret;
+
 	if (force && db_rep->queue_dbc != NULL) {
 		queue_dbp = db_rep->queue_dbc->dbp;
 		if ((t_ret = __dbc_close(db_rep->queue_dbc)) != 0 && ret == 0)
@@ -2324,8 +3689,8 @@ __rep_clean_interrupted(env)
  * __rep_filedone -
  *	We need to check if we're done with the current file after
  *	processing the current page.  Stat the database to see if
- *	we have all the pages.  If so, we need to clean up/close
- *	this one, set up for the next one, and ask for its pages,
+ *	we have all the pages and blobs.  If so, we need to clean up/close
+ *	this one, set up for the next one, and ask for its pages and blobs,
  *	or if this is the last file, request the log records and
  *	move to the REP_RECOVER_LOG state.
  */
@@ -2338,9 +3703,14 @@ __rep_filedone(env, ip, eid, rep, msgfp, type)
 	__rep_fileinfo_args *msgfp;
 	u_int32_t type;
 {
+	DBT msg;
 	REGINFO *infop;
 	__rep_fileinfo_args *rfp;
+	__rep_blob_update_req_args rbur;
 	int ret;
+	u_int8_t buf[__REP_BLOB_UPDATE_REQ_SIZE];
+
+	memset(&msg, 0, sizeof(DBT));
 
 	/*
 	 * We've put our page, now we need to do any gap processing
@@ -2375,8 +3745,96 @@ __rep_filedone(env, ip, eid, rep, msgfp, type)
 	    ((ret = __rep_queue_filedone(env, ip, rep, rfp)) !=
 	    DB_REP_PAGEDONE))
 		return (ret);
+
+	/* Request blob files. */
+	if (rfp->blob_fid_lo != 0 || rfp->blob_fid_hi != 0) {
+		ret = 0;
+		rep->blob_sync = 1;
+		memset(&rbur, 0, sizeof(__rep_blob_update_req_args));
+		GET_LO_HI(env,
+		    rfp->blob_fid_lo, rfp->blob_fid_hi, rbur.blob_fid, ret);
+		msg.size = __REP_BLOB_UPDATE_REQ_SIZE;
+		msg.data = buf;
+		__rep_blob_update_req_marshal(env, &rbur, msg.data);
+		(void)__rep_send_message(env,
+		    rep->master_id, REP_BLOB_UPDATE_REQ, NULL, &msg, 0, 0);
+		return (ret);
+	}
+
+	/*
+	 * We have all the data for this file.  Clean up.
+	 */
+	if ((ret = __rep_init_cleanup(env, rep, 0)) != 0)
+		return (ret);
+
+	rep->curfile++;
+	ret = __rep_nextfile(env, eid, rep);
+
+	return (ret);
+}
+
+/*
+ * __rep_blobdone -
+ *	We need to check if we're done with the current file after
+ *	processing the current blob chunk.
+ *
+ *	Caller must hold client database mutex (mtx_clientdb) and
+ *	REP_SYSTEM_LOCK.
+ */
+static int
+__rep_blobdone(env, eid, ip, rep, blob_fid, force)
+	ENV *env;
+	int eid;
+	DB_THREAD_INFO *ip;
+	REP *rep;
+	db_seq_t blob_fid;
+	int force;
+{
+	DBT msg;
+	__rep_blob_update_req_args rbur;
+	int done, ret;
+	u_int8_t buf[__REP_BLOB_UPDATE_REQ_SIZE];
+
 	/*
-	 * We have all the pages for this file.  Clean up.
+	 * We've written our blob chunk, now we need to do any gap processing
+	 * that might be needed to re-request chunks.
+	 */
+	done = 0;
+	ret = __rep_blob_chunk_gap(env, eid, ip, rep, &done, blob_fid, force);
+	/*
+	 * The world changed while we were doing gap processing.
+	 * We're done here.
+	 */
+	if (ret == DB_REP_PAGEDONE)
+		return (0);
+	else if (ret != 0)
+		goto err;
+
+	/*
+	 * If the blob database is empty then all files in the current list
+	 * have been processed.  However, there may be more files on the
+	 * master, so request the next list if that is the case.
+	 */
+	if (done && rep->blob_more_files) {
+		memset(&rbur, 0, sizeof(__rep_blob_update_req_args));
+		rbur.blob_fid = (u_int64_t)blob_fid;
+		rbur.blob_sid = (u_int64_t)rep->last_blob_sid;
+		rbur.blob_id = (u_int64_t)rep->last_blob_id;
+		rbur.highest_id = (u_int64_t)rep->highest_id;
+		rep->gap_bl_hi_id = rep->gap_bl_hi_sid = 0;
+		rep->gap_bl_hi_off = 0;
+		rep->blob_rereq = 0;
+		msg.size = __REP_BLOB_UPDATE_REQ_SIZE;
+		msg.data = buf;
+		__rep_blob_update_req_marshal(env, &rbur, msg.data);
+		(void)__rep_send_message(env,
+		    rep->master_id, REP_BLOB_UPDATE_REQ, NULL, &msg, 0, 0);
+		return (0);
+	} else if (!done)
+		return (0);
+
+	/*
+	 * We have all the data for this file.  Clean up.
 	 */
 	if ((ret = __rep_init_cleanup(env, rep, 0)) != 0)
 		goto err;
@@ -2388,6 +3846,255 @@ err:
 }
 
 /*
+ * __rep_blob_chunk_gap -
+ *	We have written a blob chunk.  Now check if there are any that need
+ *	to be re-requested.  The blob chunk gap database contains
+ *	descriptions of all the blob chunks that have yet to arrive.
+ *
+ *	Caller must hold client database mutex (mtx_clientdb) and
+ *	REP_SYSTEM_LOCK.
+ */
+static int
+__rep_blob_chunk_gap(env, eid, ip, rep, done, blob_fid, force)
+	ENV *env;
+	int eid;
+	DB_THREAD_INFO *ip;
+	REP *rep;
+	int *done;
+	db_seq_t blob_fid;
+	int force;
+{
+	DBC *dbc;
+	DBT data, high, key, msg;
+	DB_LOG *dblp;
+	DB_REP *db_rep;
+	LOG *lp;
+	REGINFO *infop;
+	__rep_blob_chunk_req_args rbcr;
+	__rep_fileinfo_args *rfp;
+	db_seq_t cur_blob_fid;
+	off_t offset;
+	int ret;
+	u_int8_t buf[BLOB_KEY_SIZE], msgbuf[__REP_BLOB_CHUNK_REQ_SIZE];
+
+	db_rep = env->rep_handle;
+	dblp = env->lg_handle;
+	lp = dblp->reginfo.primary;
+	infop = env->reginfo;
+	ret = 0;
+	dbc = NULL;
+	*done = 0;
+
+	 /* eid will be used when peer-to-peer is re-enabled for blobs. */
+	COMPQUIET(eid, 0);
+
+	/*
+	 * Make sure we're still talking about the same file.
+	 * If not, we're done here.
+	 */
+	GET_CURINFO(rep, infop, rfp);
+	GET_LO_HI(env, rfp->blob_fid_lo, rfp->blob_fid_hi, cur_blob_fid, ret);
+	if (cur_blob_fid != blob_fid) {
+		ret = DB_REP_PAGEDONE;
+		goto err;
+	}
+
+	/* Get the first missing blob chunk. */
+	if ((ret = __db_cursor(db_rep->blob_dbp, ip, NULL, &dbc, 0)) != 0)
+		goto err;
+	memset(&key, 0, sizeof(DBT));
+	memset(&data, 0, sizeof(DBT));
+	ret = __dbc_get(dbc, &key, &data, DB_FIRST);
+	if (ret == DB_NOTFOUND) {
+		/* All blobs received. */
+		ret = 0;
+		*done = 1;
+		goto err;
+	} else if (ret != 0)
+		goto err;
+
+	DB_ASSERT(env, key.size == BLOB_KEY_SIZE);
+	DB_ASSERT(env, data.size == sizeof(off_t));
+	offset = *(off_t *)data.data;
+	/*
+	 * Format the sdbid and id of the high chunk as a blob gap
+	 * database key, so it can be compared with the entries in that
+	 * database.
+	 */
+	memset(&high, 0, sizeof(DBT));
+	memcpy(buf, &rep->gap_bl_hi_sid, BLOB_ID_SIZE);
+	memcpy(buf + BLOB_ID_SIZE, &rep->gap_bl_hi_id, BLOB_ID_SIZE);
+	high.data = buf;
+	high.size = BLOB_KEY_SIZE;
+
+	/*
+	 * If the first chunk in the database is larger than the highest chunk
+	 * received, then there is no gap.
+	 *
+	 * If a gap does exist, check if it is time to do a re-request.  If so,
+	 * re-request every chunk that exists before the highest received.
+	 */
+	if (!force && (__rep_blob_cmp(NULL, &key, &high, NULL) > 0 ||
+	    (__rep_blob_cmp(NULL, &key, &high, NULL) == 0 &&
+	    offset > rep->gap_bl_hi_off))) {
+		lp->wait_ts = db_rep->request_gap;
+		__os_gettime(env, &lp->rcvd_ts, 1);
+	} else if (force || __rep_check_doreq(env, rep)) {
+		/*
+		 * Re-request every chunk less than the highest one, plus the
+		 * next blob chunk that we are expecting.  The next expected
+		 * blob chunk is requested in case the last blob chunk is lost
+		 * in transit.
+		 */
+		do {
+			memset(&rbcr, 0, sizeof(__rep_blob_chunk_req_args));
+			memcpy(&(rbcr.blob_sid), key.data, BLOB_ID_SIZE);
+			memcpy(&(rbcr.blob_id),
+			    (u_int8_t *)key.data + BLOB_ID_SIZE, BLOB_ID_SIZE);
+			rbcr.offset = *(u_int64_t *)data.data;
+			rbcr.blob_fid = (u_int64_t)blob_fid;
+			msg.size = __REP_BLOB_CHUNK_REQ_SIZE;
+			msg.data = msgbuf;
+			RPRINT(env, (env, DB_VERB_REP_SYNC,
+"blob_chunk_gap: Req file_id %llu, sdb_id %llu, blob_id %llu, offset %llu",
+			    (long long)rbcr.blob_fid, (long long)rbcr.blob_sid,
+			    (long long)rbcr.blob_id, (long long)rbcr.offset));
+			__rep_blob_chunk_req_marshal(env, &rbcr, msg.data);
+			/*
+			 * Note that peer-to-peer initialization is not
+			 * supported for blobs.
+			 */
+			(void)__rep_send_message(
+			    env, rep->master_id,
+			    REP_BLOB_CHUNK_REQ, NULL, &msg, 0, 0);
+			/*
+			 * Break after requesting the chunk after the highest
+			 * one.
+			 */
+			if (__rep_blob_cmp(NULL, &key, &high, NULL) > 0 ||
+			    (__rep_blob_cmp(NULL, &key, &high, NULL) == 0 &&
+			    offset > rep->gap_bl_hi_off))
+				break;
+			if ((ret = __dbc_get(
+			    dbc, &key, &data, DB_NEXT)) != 0) {
+				if (ret == DB_NOTFOUND) {
+					ret = 0;
+					break;
+				}
+				goto err;
+			}
+		} while (1);
+	}
+
+err:	if (dbc != NULL)
+		(void)__dbc_close(dbc);
+
+	return (ret);
+}
+
+/*
+ * __rep_blob_chunk_req
+ *	Answer a request for a specific blob chunk.
+ *
+ * PUBLIC: int __rep_blob_chunk_req __P((ENV *, int, DBT *));
+ */
+int
+__rep_blob_chunk_req(env, eid, rec)
+	ENV *env;
+	int eid;
+	DBT *rec;
+{
+	DB *dbp;
+	DBT msg;
+	DB_FH *fhp;
+	__rep_blob_chunk_args rbc;
+	__rep_blob_chunk_req_args rbcr;
+	int ret;
+	u_int8_t *chunk_buf, *msg_buf, *ptr;
+
+	dbp = NULL;
+	fhp = NULL;
+	chunk_buf = msg_buf = NULL;
+
+	if ((ret =
+	    __os_malloc(env, MEGABYTE + __REP_BLOB_CHUNK_SIZE, &msg_buf)) != 0)
+		goto err;
+	memset(&msg, 0, sizeof(DBT));
+	msg.data = msg_buf;
+	msg.ulen = MEGABYTE + __REP_BLOB_CHUNK_SIZE;
+	if ((ret = __os_malloc(env, MEGABYTE, &chunk_buf)) != 0)
+		goto err;
+	memset(&rbc, 0, sizeof(__rep_blob_chunk_args));
+	rbc.data.data = chunk_buf;
+	rbc.data.ulen = MEGABYTE;
+	rbc.data.flags = DB_DBT_USERMEM;
+
+	if ((ret = __rep_blob_chunk_req_unmarshal(
+	    env, &rbcr, rec->data, rec->size, &ptr)) != 0)
+		goto err;
+
+	RPRINT(env, (env, DB_VERB_REP_SYNC,
+	    "blob_chunk_req: file_id %llu, sdbid %llu, id %llu, offset %llu",
+	    (long long)rbcr.blob_fid, (long long)rbcr.blob_sid,
+	    (long long)rbcr.blob_id, (long long)rbcr.offset));
+
+	rbc.blob_fid = rbcr.blob_fid;
+	rbc.blob_id = rbcr.blob_id;
+	rbc.blob_sid = rbcr.blob_sid;
+	rbc.offset = rbcr.offset;
+	if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
+		goto err;
+	dbp->blob_file_id = (db_seq_t)rbcr.blob_fid;
+	dbp->blob_sdb_id = (db_seq_t)rbcr.blob_sid;
+	if ((ret = __blob_make_sub_dir(env, &dbp->blob_sub_dir,
+	    (db_seq_t)rbcr.blob_fid, (db_seq_t)rbcr.blob_sid)) != 0)
+		goto err;
+	if ((ret = __blob_file_open(
+	    dbp, &fhp, (db_seq_t)rbcr.blob_id, DB_FOP_READONLY, 0)) != 0) {
+		/*
+		* The file may have been deleted between creating the
+		* list and sending the request.  Send a message saying
+		* the file has been deleted.
+		*/
+		if (ret == ENOENT) {
+			ret = 0;
+			F_SET(&rbc, BLOB_DELETE);
+			rbc.data.size = 0;
+			__rep_blob_chunk_marshal(env, &rbc, msg.data);
+			msg.size = __REP_BLOB_CHUNK_SIZE;
+			(void)__rep_send_message(
+			    env, eid, REP_BLOB_CHUNK, NULL, &msg, 0, 0);
+			goto err;
+		}
+		goto err;
+	}
+	if ((ret = __blob_file_read(
+	    env, fhp, &rbc.data, (off_t)rbcr.offset, MEGABYTE)) != 0)
+		goto err;
+	DB_ASSERT(env, rbc.data.size <= MEGABYTE);
+
+	/*
+	 * In rare cases the blob file may have gotten shorter
+	 * since the list was created.
+	 */
+	if (rbc.data.size == 0)
+		F_SET(&rbc, BLOB_CHUNK_FAIL);
+	__rep_blob_chunk_marshal(env, &rbc, msg.data);
+	msg.size = __REP_BLOB_CHUNK_SIZE + rbc.data.size;
+	(void)__rep_send_message(env, eid, REP_BLOB_CHUNK, NULL, &msg, 0, 0);
+
+err:	if (chunk_buf != NULL)
+		__os_free(env, chunk_buf);
+	if (msg_buf != NULL)
+		__os_free(env, msg_buf);
+	if (fhp != NULL)
+		(void)__os_closehandle(env, fhp);
+	if (dbp != 0)
+		(void)__db_close(dbp, NULL, 0);
+	return (ret);
+}
+
+/*
  * Starts requesting pages for the next file in the list (if any), or if not,
  * proceeds to the next stage: requesting logs.
  *
@@ -2404,19 +4111,25 @@ __rep_nextfile(env, eid, rep)
 	DBT dbt;
 	__rep_logreq_args lr_args;
 	DB_LOG *dblp;
+	DB_REP *db_rep;
+	DELAYED_BLOB_LIST *dbl;
 	LOG *lp;
 	REGENV *renv;
 	REGINFO *infop;
 	__rep_fileinfo_args *curinfo, *rfp, rf;
 	__rep_fileinfo_v6_args *rfpv6;
-	int *curbuf, ret;
+	__rep_fileinfo_v7_args *rfpv7;
+	int *curbuf, ret, view_partial;
 	u_int8_t *buf, *info_ptr, lrbuf[__REP_LOGREQ_SIZE], *nextinfo;
 	size_t len, msgsz;
+	char *name;
 	void *rffree;
 
 	infop = env->reginfo;
 	renv = infop->primary;
+	db_rep = env->rep_handle;
 	rfp = NULL;
+	dbl = NULL;
 
 	/*
 	 * Always direct the next request to the master (at least nominally),
@@ -2430,13 +4143,13 @@ __rep_nextfile(env, eid, rep)
 		/* Set curinfo to next file and examine it. */
 		info_ptr = R_ADDR(infop,
 		    rep->originfo_off + (rep->originfolen - rep->infolen));
+		/*
+		 * Build a current struct by copying in the older
+		 * version struct and then setting up the new fields.
+		 * This is safe because all old fields are in the
+		 * same location in the current struct.
+		 */
 		if (rep->infoversion < DB_REPVERSION_53) {
-			/*
-			 * Build a current struct by copying in the older
-			 * version struct and then setting up the data_dir.
-			 * This is safe because all old fields are in the
-			 * same location in the current struct.
-			 */
 			if ((ret = __rep_fileinfo_v6_unmarshal(env,
 			    rep->infoversion, &rfpv6,
 			    info_ptr, rep->infolen, &nextinfo)) != 0)
@@ -2444,8 +4157,18 @@ __rep_nextfile(env, eid, rep)
 			memcpy(&rf, rfpv6, sizeof(__rep_fileinfo_v6_args));
 			rf.dir.data = NULL;
 			rf.dir.size = 0;
+			rf.blob_fid_lo = rf.blob_fid_hi = 0;
 			rfp = &rf;
 			rffree = rfpv6;
+		} else if (rep->infoversion < DB_REPVERSION_61) {
+			if ((ret = __rep_fileinfo_v7_unmarshal(env,
+			    rep->infoversion, &rfpv7,
+			    info_ptr, rep->infolen, &nextinfo)) != 0)
+				return (ret);
+			memcpy(&rf, rfpv7, sizeof(__rep_fileinfo_v7_args));
+			rf.blob_fid_lo = rf.blob_fid_hi = 0;
+			rfp = &rf;
+			rffree = rfpv7;
 		} else {
 			if ((ret = __rep_fileinfo_unmarshal(env,
 			    rep->infoversion, &rfp, info_ptr,
@@ -2457,6 +4180,14 @@ __rep_nextfile(env, eid, rep)
 			}
 			rffree = rfp;
 		}
+#ifndef HAVE_64BIT_TYPES
+		if (rfp->blob_fid_lo != 0 || rfp->blob_fid_hi != 0) {
+		    __db_errx(env, DB_STR("3705",
+			"Blobs require 64 integer compiler support."));
+			__os_free(env, rffree);
+			return (DB_OPNOTSUP);
+		}
+#endif
 		rep->infolen -= (u_int32_t)(nextinfo - info_ptr);
 		MUTEX_LOCK(env, renv->mtx_regenv);
 		ret = __env_alloc(infop, sizeof(__rep_fileinfo_args) +
@@ -2484,19 +4215,55 @@ __rep_nextfile(env, eid, rep)
 			    rfp->dir.data, rfp->dir.size);
 		__os_free(env, rffree);
 
-		/* Skip over regular DB's in "abbreviated" internal inits. */
-		if (F_ISSET(rep, REP_F_ABBREVIATED) &&
+		/*
+		 * If a partial callback is set, invoke the callback to see if
+		 * this file should be replicated.
+		 */
+		if (IS_VIEW_SITE(env) && curinfo->info.size > 0 &&
 		    !FLD_ISSET(curinfo->db_flags, DB_AM_INMEM)) {
+			name = (char *)curinfo->info.data;
+			DB_ASSERT(env, db_rep->partial != NULL);
+			/*
+			 * Always replicate system owned databases.
+			 */
+			if (IS_DB_FILE(name) && !IS_BLOB_META(name))
+				view_partial = 1;
+			else if ((ret = __rep_call_partial(env,
+			    name, &view_partial, 0, &dbl)) != 0) {
+				VPRINT(env, (env, DB_VERB_REP_SYNC,
+				    "rep_nextfile: partial cb err %d for %s",
+				    ret, name));
+				return (ret);
+			}
+			/*
+			 * dbl != NULL when we could not find the name of the
+			 * database that owns a blob meta database.  If that
+			 * happens then it was never opened, which means it
+			 * was not replicated, and as such neither should its
+			 * bmd be replicated.
+			 */
+			if (dbl != NULL) {
+				view_partial = 0;
+				__os_free(env, dbl);
+				dbl = NULL;
+			}
 			VPRINT(env, (env, DB_VERB_REP_SYNC,
-			    "Skipping file %d in abbreviated internal init",
-			    curinfo->filenum));
-			MUTEX_LOCK(env, renv->mtx_regenv);
-			__env_alloc_free(infop,
-			    R_ADDR(infop, rep->curinfo_off));
-			MUTEX_UNLOCK(env, renv->mtx_regenv);
-			rep->curinfo_off = INVALID_ROFF;
-			rep->curfile++;
-			continue;
+			    "rep_nextfile: %s file %s %d on view site.",
+			    view_partial == 0 ?
+			    "Skipping" : "Replicating",
+			    name, curinfo->filenum));
+			/*
+			 * If we're skipping the file, move to the next one.
+			 */
+			if (view_partial == 0) {
+				MUTEX_LOCK(env, renv->mtx_regenv);
+				__env_alloc_free(infop,
+				    R_ADDR(infop, rep->curinfo_off));
+				MUTEX_UNLOCK(env, renv->mtx_regenv);
+				rep->curinfo_off = INVALID_ROFF;
+				rep->curfile++;
+				continue;
+			}
 		}
 
 		/* Request this file's pages. */
@@ -2519,15 +4286,19 @@ __rep_nextfile(env, eid, rep)
 		    curinfo->uid.size + curinfo->info.size;
 		if ((ret = __os_calloc(env, 1, msgsz, &buf)) != 0)
 			return (ret);
+		/*
+		 * It is safe to cast to the old structs
+		 * because the first part of the current
+		 * struct matches the old structs.
+		 */
 		if (rep->infoversion < DB_REPVERSION_53)
-			/*
-			 * It is safe to cast to the old struct
-			 * because the first part of the current
-			 * struct matches the old struct.
-			 */
 			ret = __rep_fileinfo_v6_marshal(env, rep->infoversion,
 			    (__rep_fileinfo_v6_args *)curinfo, buf,
 			    msgsz, &len);
+		else if (rep->infoversion < DB_REPVERSION_61)
+			ret = __rep_fileinfo_v7_marshal(env, rep->infoversion,
+			    (__rep_fileinfo_v7_args *)curinfo, buf,
+			    msgsz, &len);
 		else
 			ret = __rep_fileinfo_marshal(env, rep->infoversion,
 			    curinfo, buf, msgsz, &len);
@@ -2834,16 +4605,19 @@ __rep_pggap_req(env, rep, reqfp, gapflags)
 		 * new info into rep->finfo.  Assert that the sizes never
 		 * change.  The only thing this should do is change
 		 * the pgno field.  Everything else remains the same.
+		 *
+		 * It is safe to cast to the old structs
+		 * because the first part of the current
+		 * struct matches the old structs.
 		 */
 		if (rep->infoversion < DB_REPVERSION_53)
-			/*
-			 * It is safe to cast to the old struct
-			 * because the first part of the current
-			 * struct matches the old struct.
-			 */
 			ret = __rep_fileinfo_v6_marshal(env, rep->infoversion,
 			    (__rep_fileinfo_v6_args *)tmpfp, buf,
 			    msgsz, &len);
+		else if (rep->infoversion < DB_REPVERSION_61)
+			ret = __rep_fileinfo_v7_marshal(env, rep->infoversion,
+			    (__rep_fileinfo_v7_args *)tmpfp, buf,
+			    msgsz, &len);
 		else
 			ret = __rep_fileinfo_marshal(env, rep->infoversion,
 			    tmpfp, buf, msgsz, &len);
@@ -2865,6 +4639,94 @@ err:
 }
 
 /*
+ * __rep_blob_rereq -
+ *
+ * Re-request lost blob messages, such as REP_BLOB_CHUNK_REQ, REP_BLOB_ALL_REQ,
+ * or REP_BLOB_UPDATE_REQ.  Note that the blob chunk gap database contains
+ * descriptions of the blob chunks that we are expecting to arrive.
+ *
+ * Assumes the caller holds mtx_clientdb and rep_mutex.
+ *
+ * PUBLIC: int __rep_blob_rereq __P((ENV *, REP *));
+ */
+int
+__rep_blob_rereq(env, rep)
+	ENV *env;
+	REP *rep;
+{
+	DB_REP *db_rep;
+	DB_THREAD_INFO *ip;
+	REGINFO *infop;
+	__rep_fileinfo_args *rfp;
+	db_seq_t blob_fid;
+	int master, ret;
+	u_int32_t count;
+
+	db_rep = env->rep_handle;
+	infop = env->reginfo;
+	rfp = NULL;
+	ret = 0;
+
+	/* First check if the master is around to answer the re-request. */
+	master = rep->master_id;
+	if (master == DB_EID_INVALID) {
+		(void)__rep_send_message(env,
+		    DB_EID_BROADCAST, REP_MASTER_REQ, NULL, NULL, 0, 0);
+		goto err;
+	}
+
+	if (db_rep->blob_dbp == NULL &&
+	    (ret = __rep_client_dbinit(env, 0, REP_BLOB)) != 0) {
+		RPRINT(env, (env, DB_VERB_REP_SYNC,
+		    "REP_BLOB_CHUNK: Client_dbinit %s",
+		    db_strerror(ret)));
+		goto err;
+	}
+
+	/*
+	 * If the gap blob id is 0 then we either lost a REP_BLOB_ALL_REQ or
+	 * a REP_BLOB_UPDATE_REQ message.  Since we do not have the information
+	 * to reconstruct a REP_BLOB_ALL_REQ message, reset the blob gap
+	 * database and start over at the REP_BLOB_UPDATE_REQ stage.
+	 *
+	 * If the blob gap id is not 0, we lost a REP_BLOB_CHUNK_REQ message,
+	 * so perform blob gap processing.
+	 */
+	ENV_GET_THREAD_INFO(env, ip);
+	if (rep->gap_bl_hi_id == 0) {
+		/*
+		 * It takes a while to create the blob update message, so skip
+		 * the first time it asks.
+		 */
+		if (rep->blob_rereq == 0) {
+			rep->blob_rereq = 1;
+			goto err;
+		}
+		rep->blob_rereq = 0;
+		if ((ret = __db_truncate(
+		    db_rep->blob_dbp, ip, NULL, &count)) != 0)
+			goto err;
+		rep->blob_more_files = 1;
+		rep->last_blob_id = rep->prev_blob_id;
+		rep->last_blob_sid = rep->prev_blob_sid;
+	}
+
+	GET_CURINFO(rep, infop, rfp);
+	GET_LO_HI(env, rfp->blob_fid_lo, rfp->blob_fid_hi, blob_fid, ret);
+	if (ret != 0)
+		goto err;
+	/*
+	 * If there are entries in the blob gap database, __rep_blobdone
+	 * will perform gap processing, otherwise it will send
+	 * a REP_BLOB_UPDATE_REQ.
+	 */
+	ret = __rep_blobdone(env, master, ip, rep, blob_fid, 1);
+
+err:
+	return (ret);
+}
+
+/*
  * __rep_finfo_alloc -
  *	Allocate and initialize a fileinfo structure.
  *
@@ -3521,6 +5383,7 @@ __rep_walk_filelist(env, version, files, size, count, fn, arg)
 {
 	__rep_fileinfo_args *rfp, rf;
 	__rep_fileinfo_v6_args *rfpv6;
+	__rep_fileinfo_v7_args *rfpv7;
 	u_int8_t *next;
 	int ret;
 	void *rffree;
@@ -3530,21 +5393,30 @@ __rep_walk_filelist(env, version, files, size, count, fn, arg)
 	rfpv6 = NULL;
 	rffree = NULL;
 	while (count-- > 0) {
+		/*
+		 * Build a current struct by copying in the older
+		 * version struct and then setting up the new fields.
+		 * This is safe because all old fields are in the
+		 * same location in the current struct.
+		 */
 		if (version < DB_REPVERSION_53) {
-			/*
-			 * Build a current struct by copying in the older
-			 * version struct and then setting up the data_dir.
-			 * This is safe because all old fields are in the
-			 * same location in the current struct.
-			 */
 			if ((ret = __rep_fileinfo_v6_unmarshal(env, version,
 			    &rfpv6, files, size, &next)) != 0)
 				break;
 			memcpy(&rf, rfpv6, sizeof(__rep_fileinfo_v6_args));
 			rf.dir.data = NULL;
 			rf.dir.size = 0;
+			rf.blob_fid_lo = rf.blob_fid_hi = 0;
 			rfp = &rf;
 			rffree = rfpv6;
+		} else if (version < DB_REPVERSION_61) {
+			if ((ret = __rep_fileinfo_v7_unmarshal(env, version,
+			    &rfpv7, files, size, &next)) != 0)
+				break;
+			memcpy(&rf, rfpv7, sizeof(__rep_fileinfo_v7_args));
+			rf.blob_fid_lo = rf.blob_fid_hi = 0;
+			rfp = &rf;
+			rffree = rfpv7;
 		} else {
 			if ((ret = __rep_fileinfo_unmarshal(env, version,
 			    &rfp, files, size, &next)) != 0)
@@ -3566,3 +5438,33 @@ __rep_walk_filelist(env, version, files, size, count, fn, arg)
 		__os_free(env, rffree);
 	return (ret);
 }
+
+/*
+ * Initializes a FILE_LIST_CTX structure.
+ *
+ * Pass in a non-zero value for update_space to reserve space for
+ * update_args in the context's buffer.
+ */
+static int
+__rep_init_file_list_context(env, version, flags, update_space, context)
+	ENV *env;
+	u_int32_t version;
+	u_int32_t flags;
+	int update_space;
+	FILE_LIST_CTX *context;
+{
+	int ret;
+
+	if ((ret = __os_calloc(env, 1, MEGABYTE, &context->buf)) != 0)
+		return (ret);
+	context->size = MEGABYTE;
+	context->count = 0;
+	context->version = version;
+	context->flags = flags;
+	/* Reserve space for update_args. */
+	if (update_space)
+		context->fillptr = FIRST_FILE_PTR(context->buf);
+	else
+		context->fillptr = context->buf;
+	return (ret);
+}
diff --git a/src/rep/rep_elect.c b/src/rep/rep_elect.c
index 9e8c5249..234daf31 100644
--- a/src/rep/rep_elect.c
+++ b/src/rep/rep_elect.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2004, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2004, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -53,8 +53,9 @@ __rep_elect_pp(dbenv, given_nsites, nvotes, flags)
 	u_int32_t given_nsites, nvotes;
 	u_int32_t flags;
 {
-	DB_REP *db_rep;
 	ENV *env;
+	DB_REP *db_rep;
+	DB_THREAD_INFO *ip;
 	int ret;
 
 	env = dbenv->env;
@@ -89,7 +90,9 @@ __rep_elect_pp(dbenv, given_nsites, nvotes, flags)
 		return (EINVAL);
 	}
 
+	ENV_ENTER(env, ip);
 	ret = __rep_elect_int(env, given_nsites, nvotes, flags);
+	ENV_LEAVE(env, ip);
 
 	/*
 	 * The DB_REP_IGNORE return code can be of use to repmgr (which of
@@ -120,7 +123,6 @@ __rep_elect_int(env, given_nsites, nvotes, flags)
 	DB_LOGC *logc;
 	DB_LSN lsn;
 	DB_REP *db_rep;
-	DB_THREAD_INFO *ip;
 	LOG *lp;
 	REP *rep;
 	int done, elected, in_progress;
@@ -140,6 +142,15 @@ __rep_elect_int(env, given_nsites, nvotes, flags)
 	ret = 0;
 
 	/*
+	 * View sites never participate in elections.
+	 */
+	if (IS_VIEW_SITE(env)) {
+		__db_errx(env, DB_STR("3687",
+		    "View sites may not participate in elections"));
+		return (EINVAL);
+	}
+
+	/*
 	 * Specifying 0 for nsites signals us to use the value configured
 	 * previously via rep_set_nsites.  Similarly, if the given nvotes is 0,
 	 * it asks us to compute the value representing a simple majority.
@@ -185,7 +196,6 @@ __rep_elect_int(env, given_nsites, nvotes, flags)
 	 * real, configured priority, as retrieved from REP region.
 	 */
 	ctlflags = realpri != 0 ? REPCTL_ELECTABLE : 0;
-	ENV_ENTER(env, ip);
 
 	orig_tally = 0;
 	/* If we are already master, simply broadcast that fact and return. */
@@ -597,8 +607,7 @@ out:
 	DB_ASSERT(env, rep->elect_th > 0);
 	rep->elect_th--;
 	if (rep->elect_th == 0) {
-		need_req = F_ISSET(rep, REP_F_SKIPPED_APPLY) &&
-		    !I_HAVE_WON(rep, rep->winner);
+		need_req = F_ISSET(rep, REP_F_SKIPPED_APPLY) && !elected;
 		FLD_CLR(rep->lockout_flags, REP_LOCKOUT_APPLY);
 		F_CLR(rep, REP_F_SKIPPED_APPLY);
 	}
@@ -641,7 +650,6 @@ out:
 unlck_lv:	REP_SYSTEM_UNLOCK(env);
 	}
 envleave:
-	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -1106,7 +1114,7 @@ __rep_cmp_vote(env, rep, eid, lsnp, priority, gen, data_gen, tiebreaker, flags)
 	u_int32_t priority;
 	u_int32_t data_gen, flags, gen, tiebreaker;
 {
-	int cmp, like_pri;
+	int cmp, genlog_cmp, like_pri;
 
 	cmp = LOG_COMPARE(lsnp, &rep->w_lsn);
 	/*
@@ -1140,9 +1148,18 @@ __rep_cmp_vote(env, rep, eid, lsnp, priority, gen, data_gen, tiebreaker, flags)
 		like_pri = (priority == 0 && rep->w_priority == 0) ||
 		    (priority != 0 && rep->w_priority != 0);
 
-		if ((priority != 0 && rep->w_priority == 0) ||
-		    (like_pri && data_gen > rep->w_datagen) ||
-		    (like_pri && data_gen == rep->w_datagen && cmp > 0) ||
+		/*
+		 * The undocumented ELECT_LOGLENGTH option requires that the
+		 * election should be won based on log length without regard
+		 * for datagen.  Do not include datagen in the comparison if
+		 * this option is enabled.
+		 */
+		if (FLD_ISSET(rep->config, REP_C_ELECT_LOGLENGTH))
+			genlog_cmp = like_pri && cmp > 0;
+		else
+			genlog_cmp = (like_pri && data_gen > rep->w_datagen) ||
+			    (like_pri && data_gen == rep->w_datagen && cmp > 0);
+		if ((priority != 0 && rep->w_priority == 0) || genlog_cmp ||
 		    (cmp == 0 && (priority > rep->w_priority ||
 		    (priority == rep->w_priority &&
 		    (tiebreaker > rep->w_tiebreaker))))) {
@@ -1306,8 +1323,9 @@ __rep_wait(env, timeoutp, full_elect, egen, flags)
 {
 	DB_REP *db_rep;
 	REP *rep;
-	int done;
-	u_int32_t sleeptime, sleeptotal, timeout;
+	db_timespec exptime, mytime;
+	int diff_timeout, done;
+	u_int32_t sleeptime, timeout;
 
 	db_rep = env->rep_handle;
 	rep = db_rep->region;
@@ -1315,10 +1333,20 @@ __rep_wait(env, timeoutp, full_elect, egen, flags)
 
 	timeout = *timeoutp;
 	sleeptime = SLEEPTIME(timeout);
-	sleeptotal = 0;
-	while (sleeptotal < timeout) {
+	__os_gettime(env, &exptime, 0);
+	TIMESPEC_ADD_DB_TIMEOUT(&exptime, timeout);
+	while (!done) {
+		__os_gettime(env, &mytime, 0);
+		/*
+		 * Check if the timeout has expired.  __os_yield might sleep
+		 * a slightly shorter time than requested, so check the exact
+		 * amount of time that has passed.  If we do not sleep the
+		 * full PHASE0 time, old unexpired lease grants could
+		 * incorrectly prevent the election from happening.
+		 */
+		if (timespeccmp(&mytime, &exptime, >))
+			break;
 		__os_yield(env, 0, sleeptime);
-		sleeptotal += sleeptime;
 		REP_SYSTEM_LOCK(env);
 		/*
 		 * Check if group membership changed while we were
@@ -1331,19 +1359,19 @@ __rep_wait(env, timeoutp, full_elect, egen, flags)
 		if (!LF_ISSET(REP_E_PHASE0) &&
 		    full_elect && F_ISSET(rep, REP_F_GROUP_ESTD)) {
 			*timeoutp = rep->elect_timeout;
+			if ((diff_timeout = (int)(*timeoutp - timeout)) > 0)
+				TIMESPEC_ADD_DB_TIMEOUT(&exptime, diff_timeout);
+			else {
+				diff_timeout = -diff_timeout;
+				TIMESPEC_SUB_DB_TIMEOUT(&exptime, diff_timeout);
+			}
 			timeout = *timeoutp;
-			if (sleeptotal >= timeout)
-				done = 1;
-			else
-				sleeptime = SLEEPTIME(timeout);
+			sleeptime = SLEEPTIME(timeout);
 		}
 
 		if (egen != rep->egen || !FLD_ISSET(rep->elect_flags, flags))
 			done = 1;
 		REP_SYSTEM_UNLOCK(env);
-
-		if (done)
-			return (0);
 	}
 	return (0);
 }
diff --git a/src/rep/rep_lease.c b/src/rep/rep_lease.c
index 047c39a7..b6010046 100644
--- a/src/rep/rep_lease.c
+++ b/src/rep/rep_lease.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2007, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2007, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -45,10 +45,20 @@ __rep_update_grant(env, ts)
 	timespecclear(&mytime);
 
 	/*
+	 * If we are a view, we never grant a lease.
+	 */
+	if (IS_VIEW_SITE(env))
+		return (0);
+
+	/*
 	 * Get current time, and add in the (skewed) lease duration
-	 * time to send the grant to the master.
+	 * time to send the grant to the master.  We need to use '0'
+	 * for a non-monotonic (i.e. realtime) timestamp.  Some systems
+	 * use "time since boot" for monotonic time, which would not
+	 * work between machines here.  We already document that for leases,
+	 * the time cannot go backward.
 	 */
-	__os_gettime(env, &mytime, 1);
+	__os_gettime(env, &mytime, 0);
 	timespecadd(&mytime, &rep->lease_duration);
 	REP_SYSTEM_LOCK(env);
 	/*
@@ -108,7 +118,7 @@ __rep_islease_granted(env)
 	 * Get current time and compare against our granted lease.
 	 */
 	timespecclear(&mytime);
-	__os_gettime(env, &mytime, 1);
+	__os_gettime(env, &mytime, 0);
 
 	return (timespeccmp(&mytime, &rep->grant_expire, <=) ? 1 : 0);
 }
@@ -319,9 +329,15 @@ __rep_lease_check(env, refresh)
 		max_tries = LEASE_REFRESH_MIN;
 retry:
 	REP_SYSTEM_LOCK(env);
-	min_leases = rep->config_nsites / 2;
+	/*
+	 * We need enough leases so that we're guaranteed any successful
+	 * election will include at least one site with the lease-guaranteed
+	 * data.  Note this is based on total number of sites so leases
+	 * cannot be used with half or more unelectable sites.
+	 */
+	min_leases = (rep->config_nsites - 1) / 2;
 	ret = 0;
-	__os_gettime(env, &curtime, 1);
+	__os_gettime(env, &curtime, 0);
 	VPRINT(env, (env, DB_VERB_REP_LEASE,
 "%s %d of %d refresh %d min_leases %lu curtime %lu %lu, maxLSN [%lu][%lu]",
 	    "lease_check: try ", tries, max_tries, refresh,
@@ -526,7 +542,7 @@ __rep_lease_waittime(env)
 		if (!F_ISSET(rep, REP_F_LEASE_EXPIRED))
 			to = rep->lease_timeout;
 	} else {
-		__os_gettime(env, &mytime, 1);
+		__os_gettime(env, &mytime, 0);
 		RPRINT(env, (env, DB_VERB_REP_LEASE,
     "wait_time: mytime %lu %lu, grant_expire %lu %lu",
 		    (u_long)mytime.tv_sec, (u_long)mytime.tv_nsec,
diff --git a/src/rep/rep_log.c b/src/rep/rep_log.c
index 42300685..bf72db9e 100644
--- a/src/rep/rep_log.c
+++ b/src/rep/rep_log.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2004, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2004, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -110,7 +110,7 @@ __rep_allreq(env, rp, eid)
 	 */
 	if (ret == 0 && repth.lsn.file != 1 && flags == DB_FIRST) {
 		if (F_ISSET(rep, REP_F_CLIENT))
-			ret = DB_NOTFOUND;
+			ret = USR_ERR(env, DB_NOTFOUND);
 		else
 			(void)__rep_send_message(env, eid,
 			    REP_VERIFY_FAIL, &repth.lsn, NULL, 0, 0);
@@ -466,8 +466,8 @@ __rep_log_split(env, ip, rp, rec, ret_lsnp, last_lsnp)
 		if (p >= ep && save_flags)
 			F_SET(&tmprp, save_flags);
 		/*
-		 * A previous call to __rep_apply indicated an earlier
-		 * record is a dup and the next_new_lsn we are waiting for.
+		 * A previous call to __rep_apply indicated an earlier record
+		 * is a past dup and the next_new_lsn for which we are waiting.
 		 * Skip log records until we catch up with next_new_lsn.
 		 */
 		if (is_dup && LOG_COMPARE(&tmprp.lsn, &next_new_lsn) < 0) {
@@ -482,7 +482,20 @@ __rep_log_split(env, ip, rp, rec, ret_lsnp, last_lsnp)
 		VPRINT(env, (env, DB_VERB_REP_MISC,
 		    "log_split: rep_apply ret %d, dup %d, tmp_lsn [%lu][%lu]",
 		    ret, is_dup, (u_long)tmp_lsn.file, (u_long)tmp_lsn.offset));
-		if (is_dup)
+		/*
+		 * We can skip log records between a past dup and tmp_lsn
+		 * returned by rep_apply() because we know we have all
+		 * those log records.  For a past dup, this log record is
+		 * less than or equal to tmp_lsn (which is either ready_lsn
+		 * or max_perm_lsn) and we only have records to skip when
+		 * it is less than tmp_lsn.
+		 *
+		 * We cannot skip log records for a future dup because we
+		 * may not have all of them.  In this case, this log record
+		 * is greater than or equal to tmp_lsn (which is either
+		 * ready_lsn or this log record).
+		 */
+		if (is_dup && LOG_COMPARE(&tmprp.lsn, &tmp_lsn) < 0)
 			next_new_lsn = tmp_lsn;
 		switch (ret) {
 		/*
@@ -637,7 +650,7 @@ __rep_logreq(env, rp, rec, eid)
 		if (LOG_COMPARE(&firstlsn, &rp->lsn) > 0) {
 			/* Case 3 */
 			if (F_ISSET(rep, REP_F_CLIENT)) {
-				ret = DB_NOTFOUND;
+				ret = USR_ERR(env, DB_NOTFOUND);
 				goto err;
 			}
 			(void)__rep_send_message(env, eid,
@@ -662,7 +675,7 @@ __rep_logreq(env, rp, rec, eid)
 				ret = 0;
 				goto err;
 			} else
-				ret = DB_NOTFOUND;
+				ret = USR_ERR(env, DB_NOTFOUND);
 		}
 	}
 
@@ -812,6 +825,14 @@ __rep_loggap_req(env, rep, lsnp, gapflags)
 	ret = 0;
 
 	/*
+	 * If we are in SYNC_LOG and have all the log we need (i.e.
+	 * rep->last_lsn is ZERO_LSN), just return, as there is nothing
+	 * to do while recovery is running.
+	 */
+	if (rep->sync_state == SYNC_LOG && IS_ZERO_LSN(rep->last_lsn))
+		return (0);
+
+	/*
 	 * Check if we need to ask for the gap.
 	 * We ask for the gap if:
 	 *	We are forced to with gapflags.
@@ -1030,7 +1051,7 @@ __rep_chk_newfile(env, logc, rep, rp, eid)
 				    REP_VERIFY_FAIL, &rp->lsn,
 				    NULL, 0, 0);
 			} else
-				ret = DB_NOTFOUND;
+				ret = USR_ERR(env, DB_NOTFOUND);
 		} else {
 			endlsn.offset += logc->len;
 			if ((ret = __logc_version(logc,
@@ -1054,7 +1075,7 @@ __rep_chk_newfile(env, logc, rep, rp, eid)
 			}
 		}
 	} else
-		ret = DB_NOTFOUND;
+		ret = USR_ERR(env, DB_NOTFOUND);
 
 	return (ret);
 }
diff --git a/src/rep/rep_method.c b/src/rep/rep_method.c
index f9f1924c..e0e7dd19 100644
--- a/src/rep/rep_method.c
+++ b/src/rep/rep_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -10,6 +10,7 @@
 
 #include "db_int.h"
 #include "dbinc/db_page.h"
+#include "dbinc/blob.h"
 #include "dbinc/btree.h"
 #include "dbinc/mp.h"
 #include "dbinc/txn.h"
@@ -17,14 +18,12 @@
 static int  __rep_abort_prepared __P((ENV *));
 static int  __rep_await_condition __P((ENV *,
     struct rep_waitgoal *, db_timeout_t));
-static int  __rep_bt_cmp __P((DB *, const DBT *, const DBT *));
+static int  __rep_bt_cmp __P((DB *, const DBT *, const DBT *, size_t *));
 static int  __rep_check_applied __P((ENV *,
     DB_THREAD_INFO *, DB_COMMIT_INFO *, struct rep_waitgoal *));
 static void __rep_config_map __P((ENV *, u_int32_t *, u_int32_t *));
 static u_int32_t __rep_conv_vers __P((ENV *, u_int32_t));
-static int __rep_read_lsn_history __P((ENV *,
-    DB_THREAD_INFO *, DB_TXN **, DBC **, u_int32_t,
-    __rep_lsn_hist_data_args *, struct rep_waitgoal *, u_int32_t));
+static int  __rep_defview __P((DB_ENV *, const char *, int *, u_int32_t));
 static int  __rep_restore_prepared __P((ENV *));
 static int  __rep_save_lsn_hist __P((ENV *, DB_THREAD_INFO *, DB_LSN *));
 /*
@@ -123,9 +122,11 @@ __rep_get_config(dbenv, which, onp)
 #undef	OK_FLAGS
 #define	OK_FLAGS							\
     (DB_REP_CONF_AUTOINIT | DB_REP_CONF_AUTOROLLBACK |			\
-    DB_REP_CONF_BULK | DB_REP_CONF_DELAYCLIENT | DB_REP_CONF_INMEM |	\
+    DB_REP_CONF_BULK | DB_REP_CONF_DELAYCLIENT |			\
+    DB_REP_CONF_ELECT_LOGLENGTH | DB_REP_CONF_INMEM |			\
     DB_REP_CONF_LEASE | DB_REP_CONF_NOWAIT |				\
-    DB_REPMGR_CONF_2SITE_STRICT | DB_REPMGR_CONF_ELECTIONS)
+    DB_REPMGR_CONF_2SITE_STRICT | DB_REPMGR_CONF_ELECTIONS |		\
+    DB_REPMGR_CONF_PREFMAS_CLIENT | DB_REPMGR_CONF_PREFMAS_MASTER)
 
 	if (FLD_ISSET(which, ~OK_FLAGS))
 		return (__db_ferr(env, "DB_ENV->rep_get_config", 0));
@@ -171,19 +172,30 @@ __rep_set_config(dbenv, which, on)
 	REP *rep;
 	REP_BULK bulk;
 	u_int32_t mapped, orig;
-	int ret, t_ret;
+	int inmemlog, pm_ret, ret, t_ret;
 
 	env = dbenv->env;
 	db_rep = env->rep_handle;
 	ret = 0;
+	pm_ret = 0;
+	inmemlog = 0;
 
 #undef	OK_FLAGS
 #define	OK_FLAGS							\
     (DB_REP_CONF_AUTOINIT | DB_REP_CONF_AUTOROLLBACK |			\
-    DB_REP_CONF_BULK | DB_REP_CONF_DELAYCLIENT | DB_REP_CONF_INMEM |	\
+    DB_REP_CONF_BULK | DB_REP_CONF_DELAYCLIENT |			\
+    DB_REP_CONF_ELECT_LOGLENGTH | DB_REP_CONF_INMEM |			\
     DB_REP_CONF_LEASE | DB_REP_CONF_NOWAIT |				\
-    DB_REPMGR_CONF_2SITE_STRICT | DB_REPMGR_CONF_ELECTIONS)
-#define	REPMGR_FLAGS (REP_C_2SITE_STRICT | REP_C_ELECTIONS)
+    DB_REPMGR_CONF_2SITE_STRICT | DB_REPMGR_CONF_ELECTIONS |		\
+    DB_REPMGR_CONF_PREFMAS_CLIENT | DB_REPMGR_CONF_PREFMAS_MASTER)
+#define	REPMGR_FLAGS (REP_C_2SITE_STRICT | REP_C_ELECTIONS |		\
+    REP_C_PREFMAS_CLIENT | REP_C_PREFMAS_MASTER)
+
+#define	TURNING_ON_PREFMAS(orig, curr)					\
+    ((FLD_ISSET(curr, REP_C_PREFMAS_MASTER) &&				\
+    !FLD_ISSET(orig, REP_C_PREFMAS_MASTER)) ||				\
+    (FLD_ISSET(curr, REP_C_PREFMAS_CLIENT) &&				\
+    !FLD_ISSET(orig, REP_C_PREFMAS_CLIENT)))
 
 	ENV_NOT_CONFIGURED(
 	    env, db_rep->region, "DB_ENV->rep_set_config", DB_INIT_REP);
@@ -224,6 +236,62 @@ __rep_set_config(dbenv, which, on)
 			return (EINVAL);
 		}
 		/*
+		 * The undocumented ELECT_LOGLENGTH option and the preferred
+		 * master options cannot be changed after calling repmgr_start.
+		 */
+		if (FLD_ISSET(mapped, (REP_C_ELECT_LOGLENGTH |
+		    REP_C_PREFMAS_MASTER | REP_C_PREFMAS_CLIENT)) &&
+		    F_ISSET(rep, REP_F_START_CALLED)) {
+			__db_errx(env, DB_STR("3706",
+			    "DB_ENV->rep_set_config: %s "
+			    "must be configured before DB_ENV->repmgr_start"),
+			    FLD_ISSET(mapped, REP_C_ELECT_LOGLENGTH) ?
+			    "ELECT_LOGLENGTH" : "preferred master");
+			ENV_LEAVE(env, ip);
+			return (EINVAL);
+		}
+		/*
+		 * Do not allow users to turn on preferred master if
+		 * leases or in-memory replication files are in effect,
+		 * or with a private environment or in-memory log files.
+		 */
+		if (FLD_ISSET(mapped,
+		    (REP_C_PREFMAS_MASTER | REP_C_PREFMAS_CLIENT)) &&
+		    (REP_CONFIG_IS_SET(env, (REP_C_LEASE | REP_C_INMEM)) ||
+		    (__log_get_config(dbenv,
+		    DB_LOG_IN_MEMORY, &inmemlog) == 0 &&
+		    (inmemlog > 0 || F_ISSET(env, ENV_PRIVATE))))) {
+			__db_errx(env, DB_STR("3707",
+			    "DB_ENV->rep_set_config: preferred master mode "
+			    "cannot be used with %s"),
+			    REP_CONFIG_IS_SET(env, REP_C_LEASE) ?
+			    "master leases" :
+			    REP_CONFIG_IS_SET(env, REP_C_INMEM) ?
+			    "in-memory replication files" :
+			    inmemlog > 0 ? "in-memory log files" :
+			    "a private environment");
+			ENV_LEAVE(env, ip);
+			return (EINVAL);
+		}
+		/*
+		 * If we are already in preferred master mode, we can't
+		 * turn off elections or 2site_strict and we can't turn on
+		 * leases.
+		 */
+		if (PREFMAS_IS_SET(env) && ((FLD_ISSET(mapped,
+		    (REP_C_ELECTIONS | REP_C_2SITE_STRICT)) && on == 0) ||
+		    (FLD_ISSET(mapped, REP_C_LEASE) && on > 0))) {
+			__db_errx(env, DB_STR("3708",
+			    "DB_ENV->rep_set_config: cannot %s %s "
+			    "in preferred master mode"),
+			    on == 0 ? "disable" : "enable",
+			    FLD_ISSET(mapped, REP_C_ELECTIONS) ? "elections" :
+			    FLD_ISSET(mapped, REP_C_LEASE) ? "leases" :
+			    "2SITE_STRICT");
+			ENV_LEAVE(env, ip);
+			return (EINVAL);
+		}
+		/*
 		 * Leases must be turned on before calling rep_start.
 		 * Leases can never be turned off once they're turned on.
 		 */
@@ -252,6 +320,17 @@ __rep_set_config(dbenv, which, on)
 		else
 			FLD_CLR(rep->config, mapped);
 
+#ifdef HAVE_REPLICATION_THREADS
+		/* Do automatic preferred master configuration. */
+		if (TURNING_ON_PREFMAS(orig, rep->config) &&
+		    (pm_ret = __repmgr_prefmas_auto_config(dbenv,
+		    &rep->config)) != 0) {
+			REP_SYSTEM_UNLOCK(env);
+			MUTEX_UNLOCK(env, rep->mtx_clientdb);
+			ENV_LEAVE(env, ip);
+			goto prefmas_err;
+		}
+#endif
 		/*
 		 * Bulk transfer requires special processing if it is getting
 		 * toggled.
@@ -297,10 +376,25 @@ __rep_set_config(dbenv, which, on)
 			ret = t_ret;
 #endif
 	} else {
+		orig = db_rep->config;
 		if (on)
 			FLD_SET(db_rep->config, mapped);
 		else
 			FLD_CLR(db_rep->config, mapped);
+#ifdef HAVE_REPLICATION_THREADS
+		/* Do automatic preferred master configuration. */
+		if (TURNING_ON_PREFMAS(orig, db_rep->config))
+			pm_ret =
+			    __repmgr_prefmas_auto_config(dbenv,
+			    &db_rep->config);
+#endif
+	}
+prefmas_err:
+	if (pm_ret != 0) {
+		__db_errx(env, DB_STR("3709",
+		    "DB_ENV->rep_set_config: could not complete automatic "
+		    "preferred master configuration"));
+		ret = EINVAL;
 	}
 	/* Configuring 2SITE_STRICT, etc. makes this a repmgr application */
 	if (ret == 0 && FLD_ISSET(mapped, REPMGR_FLAGS))
@@ -331,6 +425,10 @@ __rep_config_map(env, inflagsp, outflagsp)
 		FLD_SET(*outflagsp, REP_C_DELAYCLIENT);
 		FLD_CLR(*inflagsp, DB_REP_CONF_DELAYCLIENT);
 	}
+	if (FLD_ISSET(*inflagsp, DB_REP_CONF_ELECT_LOGLENGTH)) {
+		FLD_SET(*outflagsp, REP_C_ELECT_LOGLENGTH);
+		FLD_CLR(*inflagsp, DB_REP_CONF_ELECT_LOGLENGTH);
+	}
 	if (FLD_ISSET(*inflagsp, DB_REP_CONF_INMEM)) {
 		FLD_SET(*outflagsp, REP_C_INMEM);
 		FLD_CLR(*inflagsp, DB_REP_CONF_INMEM);
@@ -351,6 +449,14 @@ __rep_config_map(env, inflagsp, outflagsp)
 		FLD_SET(*outflagsp, REP_C_ELECTIONS);
 		FLD_CLR(*inflagsp, DB_REPMGR_CONF_ELECTIONS);
 	}
+	if (FLD_ISSET(*inflagsp, DB_REPMGR_CONF_PREFMAS_CLIENT)) {
+		FLD_SET(*outflagsp, REP_C_PREFMAS_CLIENT);
+		FLD_CLR(*inflagsp, DB_REPMGR_CONF_PREFMAS_CLIENT);
+	}
+	if (FLD_ISSET(*inflagsp, DB_REPMGR_CONF_PREFMAS_MASTER)) {
+		FLD_SET(*outflagsp, REP_C_PREFMAS_MASTER);
+		FLD_CLR(*inflagsp, DB_REPMGR_CONF_PREFMAS_MASTER);
+	}
 	DB_ASSERT(env, *inflagsp == 0);
 }
 
@@ -368,8 +474,10 @@ __rep_start_pp(dbenv, dbt, flags)
 	DBT *dbt;
 	u_int32_t flags;
 {
-	DB_REP *db_rep;
 	ENV *env;
+	DB_REP *db_rep;
+	DB_THREAD_INFO *ip;
+	int ret;
 
 	env = dbenv->env;
 	db_rep = env->rep_handle;
@@ -400,7 +508,11 @@ __rep_start_pp(dbenv, dbt, flags)
 		return (EINVAL);
 	}
 
-	return (__rep_start_int(env, dbt, flags));
+	ENV_ENTER(env, ip);
+	ret = __rep_start_int(env, dbt, flags, 0);
+	ENV_LEAVE(env, ip);
+
+	return (ret);
 }
 
 /*
@@ -432,13 +544,14 @@ __rep_start_pp(dbenv, dbt, flags)
  * clients that reference non-existent files whose creation was backed out
  * during a synchronizing recovery.
  *
- * PUBLIC: int __rep_start_int __P((ENV *, DBT *, u_int32_t));
+ * PUBLIC: int __rep_start_int __P((ENV *, DBT *, u_int32_t, u_int32_t));
  */
 int
-__rep_start_int(env, dbt, flags)
+__rep_start_int(env, dbt, flags, startopts)
 	ENV *env;
 	DBT *dbt;
 	u_int32_t flags;
+	u_int32_t startopts;
 {
 	DB *dbp;
 	DB_LOG *dblp;
@@ -474,9 +587,31 @@ __rep_start_int(env, dbt, flags)
 		return (EINVAL);
 	}
 
-	ENV_ENTER(env, ip);
+	/*
+	 * If we are a view, we can never become master.
+	 */
+	if (IS_VIEW_SITE(env) && role == DB_REP_MASTER) {
+		__db_errx(env, DB_STR("3685",
+		    "View site cannot become master"));
+		return (EINVAL);
+	}
+
+	/*
+	 * Check for consistent view usage.  We need to check here rather
+	 * than in __rep_open because non-rep-aware processes such as
+	 * db_stat may open/join the environment.  Rep-aware handles must
+	 * consistently set the view.
+	 */
+	if ((ret = __rep_check_view(env)) != 0) {
+		RPRINT(env, (env, DB_VERB_REP_MISC,
+		    "Application env/view mismatch."));
+		__db_errx(env, DB_STR("3686",
+		    "Application environment and view callback mismatch"));
+		return (ret);
+	}
 
 	/* Serialize rep_start() calls. */
+	ENV_GET_THREAD_INFO(env, ip);
 	MUTEX_LOCK(env, rep->mtx_repstart);
 	start_th = 1;
 
@@ -492,8 +627,14 @@ __rep_start_int(env, dbt, flags)
 		goto out;
 
 	REP_SYSTEM_LOCK(env);
+	/*
+	 * The FORCE_ROLECHG option is used when a side-effect of the role
+	 * change such as incrementing the master gen is needed regardless
+	 * of the previous role.
+	 */
 	role_chg = (!F_ISSET(rep, REP_F_MASTER) && role == DB_REP_MASTER) ||
-	    (!F_ISSET(rep, REP_F_CLIENT) && role == DB_REP_CLIENT);
+	    (!F_ISSET(rep, REP_F_CLIENT) && role == DB_REP_CLIENT) ||
+	    FLD_ISSET(startopts, REP_START_FORCE_ROLECHG);
 
 	/*
 	 * There is no need for lockout if all we're doing is sending a message.
@@ -511,9 +652,11 @@ __rep_start_int(env, dbt, flags)
 		goto out;
 	}
 
-	if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_MSG)) {
+	if (!FLD_ISSET(startopts, REP_START_WAIT_LOCKMSG) &&
+	    FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_MSG)) {
 		/*
-		 * There is already someone in msg lockout.  Return.
+		 * There is already someone in msg lockout and we are not
+		 * waiting.  Return.
 		 */
 		RPRINT(env, (env, DB_VERB_REP_MISC,
 		    "Thread already in msg lockout"));
@@ -702,10 +845,15 @@ __rep_start_int(env, dbt, flags)
 		 *       now defunct on master.
 		 *   NEWFILE: Used to delay client apply during newfile
 		 *       operation, not applicable to master.
+		 *   READONLY_MASTER: Used to coordinate preferred master
+		 *       takeover, should not remain in effect after restart.
+		 *   HOLD_GEN: Freeze gen for preferred master, should not
+		 *       remain in effect after restart.
 		 */
 		F_CLR(rep, REP_F_CLIENT | REP_F_ABBREVIATED |
 		    REP_F_MASTERELECT | REP_F_SKIPPED_APPLY | REP_F_DELAY |
-		    REP_F_LEASE_EXPIRED | REP_F_NEWFILE);
+		    REP_F_LEASE_EXPIRED | REP_F_NEWFILE |
+		    REP_F_READONLY_MASTER | REP_F_HOLD_GEN);
 		/*
 		 * When becoming a master, set the following flags:
 		 *   MASTER: Indicate that this site is master.
@@ -842,11 +990,16 @@ __rep_start_int(env, dbt, flags)
 		}
 		/*
 		 * When becoming a client, clear the following flags:
+		 *   HOLD_GEN: Freeze gen for preferred master, should not
+		 *       remain in effect after restart.
 		 *   MASTER: Site is no longer a master.
 		 *   MASTERELECT: Indicates that a master is elected
 		 *       rather than appointed, not applicable on client.
+		 *   READONLY_MASTER: Used to coordinate preferred master
+		 *       takeover, should not remain in effect after restart.
 		 */
-		F_CLR(rep, REP_F_MASTER | REP_F_MASTERELECT);
+		F_CLR(rep, REP_F_HOLD_GEN | REP_F_MASTER | REP_F_MASTERELECT |
+		    REP_F_READONLY_MASTER);
 		F_SET(rep, REP_F_CLIENT);
 
 		/*
@@ -928,6 +1081,15 @@ __rep_start_int(env, dbt, flags)
 			 * sync with the master.
 			 */
 			SET_GEN(0);
+		/*
+		 * If we are changing role to client, reset our min log file
+		 * until we hear from a master or another client.  In
+		 * particular, in a dupmaster situation, if this site loses
+		 * an election a stale min_log_file would prevent archiving.
+		 */
+#ifdef HAVE_REPLICATION_THREADS
+		rep->min_log_file = 0;
+#endif
 		REP_SYSTEM_UNLOCK(env);
 
 		/*
@@ -935,6 +1097,15 @@ __rep_start_int(env, dbt, flags)
 		 */
 		if ((ret = __dbt_usercopy(env, dbt)) != 0)
 			goto out;
+		/*
+		 * The HOLD_CLIGEN option does not allow this client's
+		 * gen to change until the REP_F_HOLD_GEN flag is cleared.
+		 * It prevents this site from responding to NEWMASTER messages
+		 * and disables updating the gen from other incoming messages.
+		 */
+		if (FLD_ISSET(startopts, REP_START_HOLD_CLIGEN))
+			F_SET(rep, REP_F_HOLD_GEN);
+
 		(void)__rep_send_message(env,
 		    DB_EID_BROADCAST, REP_NEWCLIENT, NULL, dbt, 0, 0);
 	}
@@ -967,7 +1138,6 @@ out:
 	if (start_th)
 		MUTEX_UNLOCK(env, rep->mtx_repstart);
 	__dbt_userfree(env, dbt, NULL, NULL);
-	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -1170,6 +1340,9 @@ __rep_client_dbinit(env, startup, which)
 	if (which == REP_DB) {
 		name = REPDBNAME;
 		rdbpp = &db_rep->rep_db;
+	} else if (which == REP_BLOB) {
+		name = REPBLOBNAME;
+		rdbpp = &db_rep->blob_dbp;
 	} else {
 		name = REPPAGENAME;
 		rdbpp = &db_rep->file_dbp;
@@ -1209,16 +1382,28 @@ __rep_client_dbinit(env, startup, which)
 	if (which == REP_DB &&
 	    (ret = __bam_set_bt_compare(dbp, __rep_bt_cmp)) != 0)
 		goto err;
+	if (which == REP_BLOB &&
+	    (ret = __bam_set_bt_compare(dbp, __rep_blob_cmp)) != 0 &&
+	    (ret = __db_set_dup_compare(dbp, __rep_offset_cmp)) != 0)
+		goto err;
 
 	/* Don't write log records on the client. */
 	if ((ret = __db_set_flags(dbp, DB_TXN_NOT_DURABLE)) != 0)
 		goto err;
 
+	/* Blob gap processing requires sorted duplicates. */
+	if (which == REP_BLOB) {
+		if ((ret = __db_set_blob_threshold(dbp, 0, 0)) != 0)
+			goto err;
+		if ((ret = __db_set_flags(dbp, DB_DUPSORT)) != 0)
+			goto err;
+	}
+
 	flags = DB_NO_AUTO_COMMIT | DB_CREATE | DB_INTERNAL_TEMPORARY_DB |
 	    (F_ISSET(env, ENV_THREAD) ? DB_THREAD : 0);
 
 	if ((ret = __db_open(dbp, ip, NULL, fname, subdb,
-	    (which == REP_DB ? DB_BTREE : DB_RECNO),
+	    (which == REP_PG ? DB_RECNO : DB_BTREE),
 	    flags, 0, PGNO_BASE_MD)) != 0)
 		goto err;
 
@@ -1243,14 +1428,16 @@ err:		if (dbp != NULL &&
  * care about the LSNs.
  */
 static int
-__rep_bt_cmp(dbp, dbt1, dbt2)
+__rep_bt_cmp(dbp, dbt1, dbt2, locp)
 	DB *dbp;
 	const DBT *dbt1, *dbt2;
+	size_t *locp;
 {
 	DB_LSN lsn1, lsn2;
 	__rep_control_args *rp1, *rp2;
 
 	COMPQUIET(dbp, NULL);
+	COMPQUIET(locp, NULL);
 
 	rp1 = dbt1->data;
 	rp2 = dbt2->data;
@@ -1274,6 +1461,82 @@ __rep_bt_cmp(dbp, dbt1, dbt2)
 }
 
 /*
+ * __rep_blob_cmp --
+ *
+ * Comparison function for the blob gap database.  The key is the blob_sid
+ * appended with the blob_id.
+ *
+ * PUBLIC: int  __rep_blob_cmp __P((DB *, const DBT *, const DBT *, size_t *));
+ */
+int
+__rep_blob_cmp(dbp, dbt1, dbt2, locp)
+	DB *dbp;
+	const DBT *dbt1, *dbt2;
+	size_t *locp;
+{
+	db_seq_t blob_id1, blob_id2, blob_sid1, blob_sid2;
+	u_int8_t *p;
+
+	COMPQUIET(dbp, NULL);
+	COMPQUIET(locp, NULL);
+
+	/* Use memcpy here to prevent alignment issues. */
+	p = dbt1->data;
+	memcpy(&blob_sid1, p, sizeof(db_seq_t));
+	p += sizeof(db_seq_t);
+	memcpy(&blob_id1, p, sizeof(db_seq_t));
+	p = dbt2->data;
+	memcpy(&blob_sid2, p, sizeof(db_seq_t));
+	p += sizeof(db_seq_t);
+	memcpy(&blob_id2, p, sizeof(db_seq_t));
+
+	if (blob_sid1 > blob_sid2)
+		return (1);
+
+	if (blob_sid1 < blob_sid2)
+		return (-1);
+
+	if (blob_id1 > blob_id2)
+		return (1);
+
+	if (blob_id1 < blob_id2)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * __rep_offset_cmp --
+ *
+ * Comparison function for duplicates in the the blob gap database.
+ *
+ * PUBLIC: int  __rep_offset_cmp
+ * PUBLIC:  __P((DB *, const DBT *, const DBT *, size_t *));
+ */
+int
+__rep_offset_cmp(dbp, dbt1, dbt2, locp)
+	DB *dbp;
+	const DBT *dbt1, *dbt2;
+	size_t *locp;
+{
+	off_t offset1, offset2;
+
+	COMPQUIET(dbp, NULL);
+	COMPQUIET(locp, NULL);
+
+	/* Use memcpy here to prevent alignment issues. */
+	memcpy(&offset1, dbt1->data, sizeof(off_t));
+	memcpy(&offset2, dbt2->data, sizeof(off_t));
+
+	if (offset1 == offset2)
+		return (0);
+	else if (offset1 > offset2)
+		return (1);
+
+	return (-1);
+}
+
+/*
  * __rep_abort_prepared --
  *	Abort any prepared transactions that recovery restored.
  *
@@ -1684,7 +1947,10 @@ __rep_set_nsites_pp(dbenv, n)
 "DB_ENV->rep_set_nsites: cannot call from Replication Manager application"));
 		return (EINVAL);
 	}
-	if ((ret = __rep_set_nsites_int(env, n)) == 0)
+	ENV_ENTER(env, ip);
+	ret = __rep_set_nsites_int(env, n);
+	ENV_LEAVE(env, ip);
+	if (ret == 0)
 		APP_SET_BASEAPI(env);
 	return (ret);
 }
@@ -1748,18 +2014,15 @@ __rep_get_nsites(dbenv, n)
 }
 
 /*
- * PUBLIC: int __rep_set_priority __P((DB_ENV *, u_int32_t));
+ * PUBLIC: int __rep_set_priority_pp __P((DB_ENV *, u_int32_t));
  */
 int
-__rep_set_priority(dbenv, priority)
+__rep_set_priority_pp(dbenv, priority)
 	DB_ENV *dbenv;
 	u_int32_t priority;
 {
 	DB_REP *db_rep;
 	ENV *env;
-	REP *rep;
-	u_int32_t prev;
-	int ret;
 
 	env = dbenv->env;
 	db_rep = env->rep_handle;
@@ -1767,6 +2030,30 @@ __rep_set_priority(dbenv, priority)
 	ENV_NOT_CONFIGURED(
 	    env, db_rep->region, "DB_ENV->rep_set_priority", DB_INIT_REP);
 
+	if (PREFMAS_IS_SET(env)) {
+		__db_errx(env, DB_STR_A("3710",
+"%s: cannot change priority in preferred master mode.",
+		    "%s"), "DB_ENV->rep_set_priority");
+		return (EINVAL);
+	}
+
+	return (__rep_set_priority_int(env, priority));
+}
+
+/*
+ * PUBLIC: int __rep_set_priority_int __P((ENV *, u_int32_t));
+ */
+int
+__rep_set_priority_int(env, priority)
+	ENV *env;
+	u_int32_t priority;
+{
+	DB_REP *db_rep;
+	REP *rep;
+	u_int32_t prev;
+	int ret;
+
+	db_rep = env->rep_handle;
 	ret = 0;
 	if (REP_ON(env)) {
 		rep = db_rep->region;
@@ -1807,10 +2094,10 @@ __rep_get_priority(dbenv, priority)
 }
 
 /*
- * PUBLIC: int __rep_set_timeout __P((DB_ENV *, int, db_timeout_t));
+ * PUBLIC: int __rep_set_timeout_pp __P((DB_ENV *, int, db_timeout_t));
  */
 int
-__rep_set_timeout(dbenv, which, timeout)
+__rep_set_timeout_pp(dbenv, which, timeout)
 	DB_ENV *dbenv;
 	int which;
 	db_timeout_t timeout;
@@ -1818,13 +2105,10 @@ __rep_set_timeout(dbenv, which, timeout)
 	DB_REP *db_rep;
 	DB_THREAD_INFO *ip;
 	ENV *env;
-	REP *rep;
 	int repmgr_timeout, ret;
 
 	env = dbenv->env;
 	db_rep = env->rep_handle;
-	rep = db_rep->region;
-	ret = 0;
 	repmgr_timeout = 0;
 
 	if (timeout == 0 && (which == DB_REP_CONNECTION_RETRY ||
@@ -1850,12 +2134,46 @@ __rep_set_timeout(dbenv, which, timeout)
 		return (EINVAL);
 	}
 	if (which == DB_REP_LEASE_TIMEOUT && IS_REP_STARTED(env)) {
-		ret = EINVAL;
 		__db_errx(env, DB_STR_A("3568",
 "%s: lease timeout must be set before DB_ENV->rep_start.",
 		    "%s"), "DB_ENV->rep_set_timeout");
 		return (EINVAL);
 	}
+	if (PREFMAS_IS_SET(env) &&
+	    (which == DB_REP_HEARTBEAT_MONITOR ||
+	    which == DB_REP_HEARTBEAT_SEND) &&
+	    timeout == 0) {
+		__db_errx(env, DB_STR_A("3711",
+"%s: cannot turn off heartbeat timeout in preferred master mode.",
+		    "%s"), "DB_ENV->rep_set_timeout");
+		return (EINVAL);
+	}
+
+	ret = __rep_set_timeout_int(env, which, timeout);
+
+	/* Setting a repmgr timeout makes this a repmgr application */
+	if (ret == 0 && repmgr_timeout)
+		APP_SET_REPMGR(env);
+	return (ret);
+
+}
+
+/*
+ * PUBLIC: int __rep_set_timeout_int __P((ENV *, int, db_timeout_t));
+ */
+int
+__rep_set_timeout_int(env, which, timeout)
+	ENV *env;
+	int which;
+	db_timeout_t timeout;
+{
+	DB_REP *db_rep;
+	REP *rep;
+	int ret;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	ret = 0;
 
 	switch (which) {
 	case DB_REP_CHECKPOINT_DELAY:
@@ -1888,6 +2206,7 @@ __rep_set_timeout(dbenv, which, timeout)
 			rep->ack_timeout = timeout;
 		else
 			db_rep->ack_timeout = timeout;
+		ADJUST_AUTOTAKEOVER_WAITS(db_rep, timeout);
 		break;
 	case DB_REP_CONNECTION_RETRY:
 		if (REP_ON(env))
@@ -1919,10 +2238,6 @@ __rep_set_timeout(dbenv, which, timeout)
 	    "Unknown timeout type argument to DB_ENV->rep_set_timeout"));
 		ret = EINVAL;
 	}
-
-	/* Setting a repmgr timeout makes this a repmgr application */
-	if (ret == 0 && repmgr_timeout)
-		APP_SET_REPMGR(env);
 	return (ret);
 }
 
@@ -2099,6 +2414,144 @@ __rep_set_request(dbenv, min, max)
 }
 
 /*
+ * __rep_set_view --
+ *	Set the view/partial replication function.
+ *
+ * PUBLIC: int __rep_set_view __P((DB_ENV *,
+ * PUBLIC:     int (*)(DB_ENV *, const char *, int *, u_int32_t)));
+ */
+int
+__rep_set_view(dbenv, f_partial)
+	DB_ENV *dbenv;
+	int (*f_partial) __P((DB_ENV *,
+	    const char *, int *, u_int32_t));
+{
+	DB_REP *db_rep;
+	ENV *env;
+
+	env = dbenv->env;
+	db_rep = env->rep_handle;
+
+	ENV_NOT_CONFIGURED(
+	    env, db_rep->region, "DB_ENV->rep_set_view", DB_INIT_REP);
+
+	ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->rep_set_view");
+
+	if (f_partial == NULL)
+		db_rep->partial = __rep_defview;
+	else
+		db_rep->partial = f_partial;
+	return (0);
+}
+
+/*
+ * __rep_defview --
+ *	Default view function.  Always replicate.
+ */
+static int
+__rep_defview(dbenv, name, result, flags)
+	DB_ENV *dbenv;
+	const char *name;
+	int *result;
+	u_int32_t flags;
+{
+	COMPQUIET(dbenv, NULL);
+	COMPQUIET(name, NULL);
+	COMPQUIET(flags, 0);
+	*result = 1;
+	return (0);
+}
+
+/*
+ * __rep_call_partial --
+ *	Calls the partial function, after doing some checks required for
+ *	handling blobs.
+ *
+ * PUBLIC: int __rep_call_partial
+ * PUBLIC:  __P((ENV *, const char *, int *, u_int32_t, DELAYED_BLOB_LIST **));
+ */
+int
+__rep_call_partial(env, name, result, flags, lsp)
+	ENV *env;
+	const char *name;
+	int *result;
+	u_int32_t flags;
+	DELAYED_BLOB_LIST **lsp;
+{
+	DB_LOG *dblp;
+	DB_REP *db_rep;
+	DELAYED_BLOB_LIST *dbl;
+	FNAME *fname;
+	db_seq_t blob_file_id;
+	char *file_name;
+	int ret;
+
+	ret = 0;
+	blob_file_id = 0;
+	db_rep = env->rep_handle;
+	dblp = env->lg_handle;
+	fname = NULL;
+
+	/*
+	 * If the database being sent is a blob meta database or file, then the
+	 * name of its associated database needs to be passed to the partial
+	 * function.  To do this, use the blob file id in the path to the
+	 * file to look up the blob_file_id of the associated database.  That
+	 * can be used to look up the name of the associated database through
+	 * dbreg.
+	 */
+	if (db_rep->partial == __rep_defview ||
+	    (!IS_BLOB_META(name) && !IS_BLOB_FILE(name))) {
+		ret = db_rep->partial(env->dbenv, name, result, flags);
+	} else {
+		/*
+		 * The top level blob meta database must always be replicated.
+		 */
+		if (strcmp(name, BLOB_META_FILE_NAME) == 0) {
+			*result = 1;
+			return (ret);
+		}
+		if ((ret = __blob_path_to_dir_ids(
+		    env, name, &blob_file_id, NULL)) != 0)
+			return (ret);
+		DB_ASSERT(env, blob_file_id > 0);
+
+		/*
+		 * It is possible that the database that owns this blob meta
+		 * database has not yet been processed on the client when
+		 * processing the transaction, so assume it is not replicated.
+		 * Return its information and process it later when its
+		 * owning database is processed (which must happen in the
+		 * same transaction).
+		 */
+		if (__dbreg_blob_file_to_fname(
+		    dblp, blob_file_id, 0, &fname) != 0) {
+			if ((ret = __os_malloc(
+			    env, sizeof(DELAYED_BLOB_LIST), &dbl)) != 0)
+				return (ret);
+			memset(dbl, 0, sizeof(DELAYED_BLOB_LIST));
+			dbl->blob_file_id = blob_file_id;
+			if (*lsp == NULL)
+				*lsp = dbl;
+			else {
+				dbl->next = *lsp;
+				(*lsp)->prev = dbl;
+				*lsp = dbl;
+			}
+			*result = 0;
+			return (0);
+		}
+
+		file_name = fname->fname_off == INVALID_ROFF ?
+		    NULL : R_ADDR(&dblp->reginfo, fname->fname_off);
+		DB_ASSERT(env, file_name != NULL);
+		ret = db_rep->partial(env->dbenv, file_name, result, flags);
+	}
+
+	return (ret);
+}
+
+/*
  * __rep_set_transport_pp --
  *	Set the transport function for replication.
  *
@@ -2288,25 +2741,46 @@ __rep_set_clockskew(dbenv, fast_clock, slow_clock)
 }
 
 /*
- * __rep_flush --
+ * __rep_flush_pp --
  *	Re-push the last log record to all clients, in case they've lost
  *	messages and don't know it.
  *
- * PUBLIC: int __rep_flush __P((DB_ENV *));
+ * PUBLIC: int __rep_flush_pp __P((DB_ENV *));
  */
 int
-__rep_flush(dbenv)
+__rep_flush_pp (dbenv)
 	DB_ENV *dbenv;
 {
+	ENV *env;
+	DB_THREAD_INFO *ip;
+	int ret;
+
+	env = dbenv->env;
+
+	ENV_ENTER(env, ip);
+	ret = __rep_flush_int(env);
+	ENV_LEAVE(env, ip);
+
+	return (ret);
+}
+
+/*
+ * __rep_flush_int --
+ *	Re-push the last log record to all clients, in case they've lost
+ *	messages and don't know it.
+ *
+ * PUBLIC: int __rep_flush_int __P((ENV *));
+ */
+int
+__rep_flush_int(env)
+	ENV *env;
+{
 	DBT rec;
 	DB_LOGC *logc;
 	DB_LSN lsn;
 	DB_REP *db_rep;
-	DB_THREAD_INFO *ip;
-	ENV *env;
 	int ret, t_ret;
 
-	env = dbenv->env;
 	db_rep = env->rep_handle;
 
 	ENV_REQUIRES_CONFIG_XX(
@@ -2322,8 +2796,6 @@ __rep_flush(dbenv)
 		return (EINVAL);
 	}
 
-	ENV_ENTER(env, ip);
-
 	if ((ret = __log_cursor(env, &logc)) != 0)
 		return (ret);
 
@@ -2338,7 +2810,6 @@ __rep_flush(dbenv)
 
 err:	if ((t_ret = __logc_close(logc)) != 0 && ret == 0)
 		ret = t_ret;
-	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -2693,7 +3164,7 @@ __rep_check_applied(env, ip, commit_info, reasonp)
 	 */
 	if (commit_info->gen == gen) {
 		ret = __rep_read_lsn_history(env,
-		    ip, &txn, &dbc, gen, &hist, reasonp, DB_SET);
+		    ip, &txn, &dbc, gen, &hist, reasonp, DB_SET, 1);
 		if (ret == DB_NOTFOUND) {
 			/*
 			 * We haven't yet received the LSN history of the
@@ -2720,7 +3191,7 @@ __rep_check_applied(env, ip, commit_info, reasonp)
 			 * masters at the same gen, and the txn of interest was
 			 * rolled back.
 			 */
-			ret = DB_NOTFOUND;
+			ret = USR_ERR(env, DB_NOTFOUND);
 			goto out;
 		}
 
@@ -2750,7 +3221,7 @@ __rep_check_applied(env, ip, commit_info, reasonp)
 		 * description of the txn of interest doesn't match what we see
 		 * in the history available to us now.
 		 */
-		ret = DB_NOTFOUND;
+		ret = USR_ERR(env, DB_NOTFOUND);
 
 	} else if (commit_info->gen < gen || gen == 0) {
 		/*
@@ -2759,10 +3230,10 @@ __rep_check_applied(env, ip, commit_info, reasonp)
 		 * the token LSN is within the close/open range defined by
 		 * [base,next).
 		 */
-		ret = __rep_read_lsn_history(env,
-		    ip, &txn, &dbc, commit_info->gen, &hist, reasonp, DB_SET);
-		t_ret = __rep_read_lsn_history(env,
-		    ip, &txn, &dbc, commit_info->gen, &hist2, reasonp, DB_NEXT);
+		ret = __rep_read_lsn_history(env, ip,
+		    &txn, &dbc, commit_info->gen, &hist, reasonp, DB_SET, 1);
+		t_ret = __rep_read_lsn_history(env, ip,
+		    &txn, &dbc, commit_info->gen, &hist2, reasonp, DB_NEXT, 1);
 		if (ret == DB_NOTFOUND) {
 			/*
 			 * If the desired gen is not in our database, it could
@@ -2812,7 +3283,7 @@ __rep_check_applied(env, ip, commit_info, reasonp)
 			 * don't match, meaning the txn was written at a dup
 			 * master and that gen instance was rolled back.
 			 */
-			ret = DB_NOTFOUND;
+			ret = USR_ERR(env, DB_NOTFOUND);
 			goto out;
 		}
 
@@ -2837,7 +3308,7 @@ __rep_check_applied(env, ip, commit_info, reasonp)
 		    LOG_COMPARE(&commit_info->lsn, &hist2.lsn) < 0)
 			ret = 0;
 		else
-			ret = DB_NOTFOUND;
+			ret = USR_ERR(env, DB_NOTFOUND);
 	} else {
 		/*
 		 * Token names a future gen.  If we're a client and the LSN also
@@ -2851,7 +3322,7 @@ __rep_check_applied(env, ip, commit_info, reasonp)
 			reasonp->u.gen = commit_info->gen;
 			return (DB_TIMEOUT);
 		}
-		return (DB_NOTFOUND);
+		return (USR_ERR(env, DB_NOTFOUND));
 	}
 
 out:
@@ -2867,9 +3338,19 @@ out:
 /*
  * The txn and dbc handles are owned by caller, though we create them if
  * necessary.  Caller is responsible for closing them.
+ *
+ * The use_cache option is enabled for the read-your-writes feature, which
+ * makes frequent requests for the cached information (envid and lsn) when it
+ * is in use.  Callers that require information that is not cached (e.g.
+ * timestamp) should not set use_cache.
+ *
+ * PUBLIC: int __rep_read_lsn_history __P((ENV *, DB_THREAD_INFO *, DB_TXN **,
+ * PUBLIC:    DBC **, u_int32_t, __rep_lsn_hist_data_args *,
+ * PUBLIC:    struct rep_waitgoal *, u_int32_t, int));
  */
-static int
-__rep_read_lsn_history(env, ip, txn, dbc, gen, gen_infop, reasonp, flags)
+int
+__rep_read_lsn_history(env, ip, txn, dbc, gen, gen_infop, reasonp, flags,
+    use_cache)
 	ENV *env;
 	DB_THREAD_INFO *ip;
 	DB_TXN **txn;
@@ -2878,6 +3359,7 @@ __rep_read_lsn_history(env, ip, txn, dbc, gen, gen_infop, reasonp, flags)
 	__rep_lsn_hist_data_args *gen_infop;
 	struct rep_waitgoal *reasonp;
 	u_int32_t flags;
+	int use_cache;
 {
 	DB_REP *db_rep;
 	REP *rep;
@@ -2898,7 +3380,8 @@ __rep_read_lsn_history(env, ip, txn, dbc, gen, gen_infop, reasonp, flags)
 	/* Simply return cached info, if we already have it. */
 	desired_gen = flags == DB_SET ? gen : gen + 1;
 	REP_SYSTEM_LOCK(env);
-	if (rep->gen == desired_gen && !IS_ZERO_LSN(rep->gen_base_lsn)) {
+	if (use_cache && rep->gen == desired_gen &&
+	    !IS_ZERO_LSN(rep->gen_base_lsn)) {
 		gen_infop->lsn = rep->gen_base_lsn;
 		gen_infop->envid = rep->master_envid;
 		goto unlock;
@@ -3005,8 +3488,14 @@ __rep_conv_vers(env, log_ver)
 
 	/*
 	 * We can't use a switch statement, some of the DB_LOGVERSION_XX
-	 * constants are the same
+	 * constants are the same.
 	 */
+	if (log_ver == DB_LOGVERSION_61)
+		return (DB_REPVERSION_61);
+	if (log_ver == DB_LOGVERSION_60p1)
+		return (DB_REPVERSION_60);
+	if (log_ver == DB_LOGVERSION_60)
+		return (DB_REPVERSION_60);
 	if (log_ver == DB_LOGVERSION_53)
 		return (DB_REPVERSION_53);
 	if (log_ver == DB_LOGVERSION_52)
diff --git a/src/rep/rep_record.c b/src/rep/rep_record.c
index f4691974..b206e60e 100644
--- a/src/rep/rep_record.c
+++ b/src/rep/rep_record.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,13 +9,17 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_am.h"
 #include "dbinc/lock.h"
 #include "dbinc/mp.h"
 #include "dbinc/txn.h"
 
-static int __rep_collect_txn __P((ENV *, DB_LSN *, LSN_COLLECTION *));
+static int __rep_collect_txn
+    __P((ENV *, DB_LSN *, LSN_COLLECTION *, DELAYED_BLOB_LIST **));
+static int __rep_remove_delayed_blobs
+    __P((ENV *, db_seq_t, u_int32_t ,DELAYED_BLOB_LIST **));
 static int __rep_do_ckp __P((ENV *, DBT *, __rep_control_args *));
 static int __rep_fire_newmaster __P((ENV *, u_int32_t, int));
 static int __rep_fire_startupdone __P((ENV *, u_int32_t, int));
@@ -153,6 +157,7 @@ __rep_process_message_pp(dbenv, control, rec, eid, ret_lsnp)
 	DB_LSN *ret_lsnp;
 {
 	ENV *env;
+	DB_THREAD_INFO *ip;
 	int ret;
 
 	env = dbenv->env;
@@ -193,7 +198,9 @@ __rep_process_message_pp(dbenv, control, rec, eid, ret_lsnp)
 		return (ret);
 	}
 
+	ENV_ENTER(env, ip);
 	ret = __rep_process_message_int(env, control, rec, eid, ret_lsnp);
+	ENV_LEAVE(env, ip);
 
 	__dbt_userfree(env, control, rec, NULL);
 	return (ret);
@@ -289,8 +296,7 @@ __rep_process_message_int(env, control, rec, eid, ret_lsnp)
 	if (ret_lsnp != NULL)
 		ZERO_LSN(*ret_lsnp);
 
-	ENV_ENTER(env, ip);
-
+	ENV_GET_THREAD_INFO(env, ip);
 	REP_PRINT_MESSAGE(env, eid, rp, "rep_process_message", 0);
 	/*
 	 * Check the version number for both rep and log.  If it is
@@ -303,8 +309,7 @@ __rep_process_message_int(env, control, rec, eid, ret_lsnp)
 			    "%lu %d"), (u_long)rp->rep_version,
 			    DB_REPVERSION_MIN);
 
-			ret = EINVAL;
-			goto errlock;
+			return (EINVAL);
 		}
 		VPRINT(env, (env, DB_VERB_REP_MSGS,
 		    "Received record %lu with old rep version %lu",
@@ -322,8 +327,7 @@ __rep_process_message_int(env, control, rec, eid, ret_lsnp)
 		__db_errx(env, DB_STR_A("3517",
 		    "unexpected replication message version %lu, expected %d",
 		    "%lu %d"), (u_long)rp->rep_version, DB_REPVERSION);
-		ret = EINVAL;
-		goto errlock;
+		return (EINVAL);
 	}
 
 	if (rp->log_version < DB_LOGVERSION) {
@@ -332,8 +336,7 @@ __rep_process_message_int(env, control, rec, eid, ret_lsnp)
  "unsupported old replication log version %lu, minimum version %d",
 			    "%lu %d"), (u_long)rp->log_version,
 			    DB_LOGVERSION_MIN);
-			ret = EINVAL;
-			goto errlock;
+			return (EINVAL);
 		}
 		VPRINT(env, (env, DB_VERB_REP_MSGS,
 		    "Received record %lu with old log version %lu",
@@ -342,8 +345,7 @@ __rep_process_message_int(env, control, rec, eid, ret_lsnp)
 		__db_errx(env, DB_STR_A("3519",
 		    "unexpected log record version %lu, expected %d",
 		    "%lu %d"), (u_long)rp->log_version, DB_LOGVERSION);
-		ret = EINVAL;
-		goto errlock;
+		return (EINVAL);
 	}
 
 	/*
@@ -465,9 +467,14 @@ __rep_process_message_int(env, control, rec, eid, ret_lsnp)
 		 * accept the generation number and participate in future
 		 * elections and communication. Otherwise, I need to hear about
 		 * a new master and sync up.
+		 *
+		 * But do not do any of this if REP_F_HOLD_GEN is set.  In
+		 * this case we keep the site at its current gen until we
+		 * clear this flag.
 		 */
-		if (rp->rectype == REP_ALIVE ||
-		    rp->rectype == REP_VOTE1 || rp->rectype == REP_VOTE2) {
+		if ((rp->rectype == REP_ALIVE ||
+		    rp->rectype == REP_VOTE1 || rp->rectype == REP_VOTE2) &&
+		    !F_ISSET(rep, REP_F_HOLD_GEN)) {
 			REP_SYSTEM_LOCK(env);
 			RPRINT(env, (env, DB_VERB_REP_MSGS,
 			    "Updating gen from %lu to %lu",
@@ -593,6 +600,38 @@ __rep_process_message_int(env, control, rec, eid, ret_lsnp)
 		ret = __rep_allreq(env, rp, eid);
 		CLIENT_REREQ;
 		break;
+	case REP_BLOB_ALL_REQ:
+		/* Blobs do not support peer-to-peer. */
+		RECOVERING_SKIP;
+		MASTER_ONLY(rep, rp);
+		ret = __rep_blob_allreq(env, eid, rec);
+		CLIENT_REREQ;
+		break;
+	case REP_BLOB_CHUNK:
+		/* Handle even if in recovery. */
+		CLIENT_ONLY(rep, rp);
+		ret = __rep_blob_chunk(env, eid, ip, rec);
+		if (ret == DB_REP_PAGEDONE)
+			ret = 0;
+		break;
+	case REP_BLOB_CHUNK_REQ:
+		/* Blobs do not support peer-to-peer. */
+		RECOVERING_SKIP;
+		MASTER_ONLY(rep, rp);
+		ret = __rep_blob_chunk_req(env, eid, rec);
+		CLIENT_REREQ;
+		break;
+	case REP_BLOB_UPDATE:
+		CLIENT_ONLY(rep, rp);
+		ret = __rep_blob_update(env, eid, ip, rec);
+		break;
+	case REP_BLOB_UPDATE_REQ:
+		MASTER_ONLY(rep, rp);
+		infop = env->reginfo;
+		renv = infop->primary;
+		MASTER_UPDATE(env, renv);
+		ret = __rep_blob_update_req(env, ip, rec);
+		break;
 	case REP_BULK_LOG:
 		RECOVERING_LOG_SKIP;
 		CLIENT_ONLY(rep, rp);
@@ -1059,8 +1098,6 @@ out:
 			*ret_lsnp = rp->lsn;
 		ret = DB_REP_NOTPERM;
 	}
-	__dbt_userfree(env, control, rec, NULL);
-	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -1290,8 +1327,24 @@ gap_check:
 #endif
 		}
 
-		if (ret == DB_KEYEXIST)
+		if (ret == DB_KEYEXIST) {
+			STAT(rep->stat.st_log_duplicated++);
+#ifdef	CONFIG_TEST
+			STAT(rep->stat.st_log_futuredup++);
+#endif
+			if (is_dupp != NULL) {
+				*is_dupp = 1;
+				/*
+				 * Could get overwritten by max_lsn later,
+				 * but only when returning NOTPERM for a
+				 * REPCTL_PERM record, in which case max_lsn
+				 * is this log record.
+				 */
+				if (ret_lsnp != NULL)
+					*ret_lsnp = lp->ready_lsn;
+			}
 			ret = 0;
+		}
 		if (ret != 0 && ret != ENOMEM)
 			goto done;
 
@@ -1337,10 +1390,11 @@ gap_check:
 			 * But max_lsn is guaranteed <= ready_lsn, so
 			 * it would be a more conservative LSN to return.
 			 */
-			*ret_lsnp = lp->ready_lsn;
+			if (ret_lsnp != NULL)
+				*ret_lsnp = lp->ready_lsn;
 		}
 		LOGCOPY_32(env, &rectype, rec->data);
-		if (rectype == DB___txn_regop || rectype == DB___txn_ckp)
+		if (IS_PERM_RECTYPE(rectype))
 			max_lsn = lp->max_perm_lsn;
 		/*
 		 * We check REPCTL_LEASE here, because this client may
@@ -1536,6 +1590,7 @@ __rep_process_txn(env, rec)
 	DB_REP *db_rep;
 	DB_THREAD_INFO *ip;
 	DB_TXNHEAD *txninfo;
+	DELAYED_BLOB_LIST *dblp, *dummy;
 	LSN_COLLECTION lc;
 	REP *rep;
 	__txn_regop_args *txn_args;
@@ -1548,12 +1603,12 @@ __rep_process_txn(env, rec)
 	db_rep = env->rep_handle;
 	rep = db_rep->region;
 	logc = NULL;
+	dblp = dummy = NULL;
 	txn_args = NULL;
 	txn42_args = NULL;
 	prep_args = NULL;
 	txninfo = NULL;
 
-	ENV_ENTER(env, ip);
 	memset(&data_dbt, 0, sizeof(data_dbt));
 	if (F_ISSET(env, ENV_THREAD))
 		F_SET(&data_dbt, DB_DBT_REALLOC);
@@ -1618,8 +1673,19 @@ __rep_process_txn(env, rec)
 		goto err;
 
 	/* Phase 1.  Get a list of the LSNs in this transaction, and sort it. */
-	if ((ret = __rep_collect_txn(env, &prev_lsn, &lc)) != 0)
+	if ((ret = __rep_collect_txn(env, &prev_lsn, &lc, &dblp)) != 0)
 		goto err;
+	/* Deal with any child transactions that had to be delayed. */
+	while (dblp != NULL) {
+		if ((ret = __rep_collect_txn(
+		    env, &dblp->lsn, &lc, &dummy)) != 0)
+			goto err;
+		DB_ASSERT(env, dummy == NULL);
+		dummy = dblp;
+		dblp = dummy->next;
+		__os_free(env, dummy);
+		dummy = NULL;
+	}
 	qsort(lc.array, lc.nlsns, sizeof(DB_LSN), __rep_lsn_cmp);
 
 	/*
@@ -1627,6 +1693,7 @@ __rep_process_txn(env, rec)
 	 * records.  Create a txnlist so that they can keep track of file
 	 * state between records.
 	 */
+	ENV_GET_THREAD_INFO(env, ip);
 	if ((ret = __db_txnlist_init(env, ip, 0, 0, NULL, &txninfo)) != 0)
 		goto err;
 
@@ -1647,6 +1714,7 @@ __rep_process_txn(env, rec)
 			    (u_long)lsnp->file, (u_long)lsnp->offset);
 			goto err;
 		}
+		LOGCOPY_32(env, &rectype, data_dbt.data);
 	}
 
 err:	memset(&req, 0, sizeof(req));
@@ -1658,6 +1726,12 @@ err:	memset(&req, 0, sizeof(req));
 	if ((t_ret = __lock_id_free(env, locker)) != 0 && ret == 0)
 		ret = t_ret;
 
+	while (dblp != NULL) {
+		dummy = dblp;
+		dblp = dummy->next;
+		__os_free(env, dummy);
+	}
+
 err1:	if (txn_args != NULL)
 		__os_free(env, txn_args);
 	if (txn42_args != NULL)
@@ -1694,25 +1768,52 @@ err1:	if (txn_args != NULL)
  *	the entire transaction family at once.
  */
 static int
-__rep_collect_txn(env, lsnp, lc)
+__rep_collect_txn(env, lsnp, lc, dbl)
 	ENV *env;
 	DB_LSN *lsnp;
 	LSN_COLLECTION *lc;
+	DELAYED_BLOB_LIST **dbl;
 {
+	__dbreg_register_args *dbregargp;
 	__txn_child_args *argp;
 	DB_LOGC *logc;
 	DB_LSN c_lsn;
+	DB_REP *db_rep;
 	DBT data;
-	u_int32_t rectype;
+	db_seq_t blob_file_id;
+	u_int32_t child, rectype, skip_txnid;
 	u_int nalloc;
-	int ret, t_ret;
+	int ret, t_ret, view_partial;
+	char *name;
 
 	memset(&data, 0, sizeof(data));
 	F_SET(&data, DB_DBT_REALLOC);
+	skip_txnid = TXN_INVALID;
 
 	if ((ret = __log_cursor(env, &logc)) != 0)
 		return (ret);
 
+	/*
+	 * For partial replication we assume a certain sequence of
+	 * log records to detect a database create and skip it if
+	 * desired.  We are walking backward through the records of
+	 * a single transaction right now.
+	 *
+	 * A create operation is done inside a BDB-owned child txn.
+	 * Nothing else is done within this BDB-owned child txn.
+	 * The last piece of a create operations is the dbreg_register
+	 * log record that records the opening of the file.  That
+	 * log record contains the child txnid in the 'id' field, and
+	 * the file name.  At this point we invoke the partial callback
+	 * to determine if this database should be replicated.  If it
+	 * should not be replicated, we need to avoid collecting the
+	 * entire child txn referenced in the 'id' field.
+	 *
+	 * So if processing the dbreg_register record finds a database
+	 * to skip, we store the child txnid in 'skip_txnid'.  We use
+	 * 'skip_txnid' to avoid processing log records or making
+	 * recursive calls for that txnid.
+	 */
 	while (!IS_ZERO_LSN(*lsnp) &&
 	    (ret = __logc_get(logc, lsnp, &data, DB_SET)) == 0) {
 		LOGCOPY_32(env, &rectype, data.data);
@@ -1722,9 +1823,66 @@ __rep_collect_txn(env, lsnp, lc)
 				goto err;
 			c_lsn = argp->c_lsn;
 			*lsnp = argp->prev_lsn;
+			child = argp->child;
 			__os_free(env, argp);
-			ret = __rep_collect_txn(env, &c_lsn, lc);
-		} else {
+
+			if (child == skip_txnid && *dbl != NULL &&
+			    (*dbl)->child == child)
+				(*dbl)->lsn = c_lsn;
+			/*
+			 * If skip_txnid is set, it is the id of the child txnid
+			 * that creates a database we should skip.  So, if
+			 * this is that child txn, do not collect it.
+			 */
+			if (skip_txnid == TXN_INVALID || child != skip_txnid)
+				ret = __rep_collect_txn(env, &c_lsn, lc, dbl);
+		} else if (IS_VIEW_SITE(env) &&
+		    rectype == DB___dbreg_register) {
+			db_rep = env->rep_handle;
+			/*
+			 * If we are a view see if this is a file creation
+			 * stream.  On-disk files have the creating child txn
+			 * in the 'id' field and the name.  See if this view
+			 * wants this file.
+			 */
+			if ((ret = __dbreg_register_read(
+			    env, data.data, &dbregargp)) != 0)
+				goto err;
+			child = dbregargp->id;
+			name = (char *)dbregargp->name.data;
+			skip_txnid = TXN_INVALID;
+			if (child != TXN_INVALID &&
+			    (!IS_DB_FILE(name) || IS_BLOB_META(name))) {
+				/*
+				 * The 'id' has a child txn so it is a create.
+				 */
+				DB_ASSERT(env, db_rep->partial != NULL);
+				GET_LO_HI(env, dbregargp->blob_fid_lo,
+				    dbregargp->blob_fid_hi, blob_file_id, ret);
+				if (ret != 0)
+					goto err;
+				if ((ret = __rep_call_partial(env,
+				    name, &view_partial, 0, dbl)) != 0) {
+					VPRINT(env, (env, DB_VERB_REP_MISC,
+		    "rep_collect_txn: partial cb err %d for %s", ret, name));
+					__os_free(env, dbregargp);
+					goto err;
+				}
+				/*
+				 * Save the child txnid for when we walk back
+				 * into the txn_child record.
+				 */
+				if (view_partial == 0) {
+					skip_txnid = child;
+					if ((ret =
+					    __rep_remove_delayed_blobs(env,
+					    blob_file_id, child, dbl)) != 0)
+						goto err;
+				}
+			}
+			__os_free(env, dbregargp);
+		}
+		if (rectype != DB___txn_child) {
 			if (lc->nalloc < lc->nlsns + 1) {
 				nalloc = lc->nalloc == 0 ? 20 : lc->nalloc * 2;
 				if ((ret = __os_realloc(env,
@@ -1761,6 +1919,62 @@ err:	if ((t_ret = __logc_close(logc)) != 0 && ret == 0)
 }
 
 /*
+ * __rep_remove_delayed_blobs --
+ *
+ * If a blob meta database is opened in the same transaction as the database
+ * that owns it, then deciding whether it should be replicated or not needs
+ * to be delayed until after the rest of the transaction is processed.  To do
+ * this, the transaction's information is added to a DELAYED_BLOB_LIST.  When
+ * the owning database is processed, if it is not replicated then remove the
+ * entry of its blob meta database from the delayed list.
+ */
+static int
+__rep_remove_delayed_blobs(env, blob_file_id, child, dbl)
+	ENV *env;
+	db_seq_t blob_file_id;
+	u_int32_t child;
+	DELAYED_BLOB_LIST **dbl;
+{
+	DELAYED_BLOB_LIST *ent, *next, *prev;
+
+	if (*dbl == NULL)
+		return (0);
+
+	/*
+	 * If the child transaction has not been set, then a new entry was just
+	 * added to the list.
+	 */
+	if ((*dbl)->child == 0) {
+		(*dbl)->child = child;
+		return (0);
+	}
+
+	if (blob_file_id == 0)
+		return (0);
+
+	/*
+	 * This blob meta database should not be replicated if its associated
+	 * database is not replicated.  Remove it from the delayed
+	 * list so it will not be processed at a later time.
+	 */
+	for (ent = *dbl; ent != NULL; ent = (DELAYED_BLOB_LIST *)ent->next) {
+		if (ent->blob_file_id == blob_file_id && ent->child != child) {
+			next = (DELAYED_BLOB_LIST *)ent->next;
+			prev = (DELAYED_BLOB_LIST *)ent->prev;
+			if (ent == *dbl)
+				*dbl = next;
+			if (prev != NULL)
+				prev->next = ent->next;
+			if (next != NULL)
+				next->prev = ent->prev;
+			__os_free(env, ent);
+			break;
+		}
+	}
+	return (0);
+}
+
+/*
  * __rep_lsn_cmp --
  *	qsort-type-compatible wrapper for LOG_COMPARE.
  */
@@ -2138,9 +2352,13 @@ __rep_process_rec(env, ip, rp, rec, ret_tsp, ret_lsnp)
 				ret = __rep_process_txn(env, rec);
 		} while (ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED);
 
-		/* Now flush the log unless we're running TXN_NOSYNC. */
-		if (ret == 0 && !F_ISSET(env->dbenv, DB_ENV_TXN_NOSYNC))
-			ret = __log_flush(env, NULL);
+		/* Now write/flush the log as appropriate. */
+		if (ret == 0) {
+			if (F_ISSET(env->dbenv, DB_ENV_TXN_WRITE_NOSYNC))
+				ret = __log_rep_write(env);
+			else if (!F_ISSET(env->dbenv, DB_ENV_TXN_NOSYNC))
+				ret = __log_flush(env, NULL);
+		}
 		if (ret != 0) {
 			__db_errx(env, DB_STR_A("3526",
 			    "Error processing txn [%lu][%lu]", "%lu %lu"),
@@ -2256,7 +2474,7 @@ __rep_resend_req(env, rereq)
 	DB_REP *db_rep;
 	LOG *lp;
 	REP *rep;
-	int master, ret;
+	int blob_sync, master, ret;
 	repsync_t sync_state;
 	u_int32_t gapflags, msgtype, repflags, sendflags;
 
@@ -2271,6 +2489,7 @@ __rep_resend_req(env, rereq)
 
 	repflags = rep->flags;
 	sync_state = rep->sync_state;
+	blob_sync = rep->blob_sync;
 	/*
 	 * If we are delayed we do not rerequest anything.
 	 */
@@ -2293,9 +2512,17 @@ __rep_resend_req(env, rereq)
 		 */
 		msgtype = REP_UPDATE_REQ;
 	} else if (sync_state == SYNC_PAGE) {
-		REP_SYSTEM_LOCK(env);
-		ret = __rep_pggap_req(env, rep, NULL, gapflags);
-		REP_SYSTEM_UNLOCK(env);
+		if (blob_sync == 0) {
+			REP_SYSTEM_LOCK(env);
+			ret = __rep_pggap_req(env, rep, NULL, gapflags);
+			REP_SYSTEM_UNLOCK(env);
+		} else {
+			MUTEX_LOCK(env, rep->mtx_clientdb);
+			REP_SYSTEM_LOCK(env);
+			ret = __rep_blob_rereq(env, rep);
+			REP_SYSTEM_UNLOCK(env);
+			MUTEX_UNLOCK(env, rep->mtx_clientdb);
+		}
 	} else {
 		MUTEX_LOCK(env, rep->mtx_clientdb);
 		ret = __rep_loggap_req(env, rep, NULL, gapflags);
@@ -2397,9 +2624,20 @@ __rep_skip_msg(env, rep, eid, rectype)
 		if (rep->master_id == DB_EID_INVALID)	/* Case 1. */
 			(void)__rep_send_message(env,
 			    DB_EID_BROADCAST, REP_MASTER_REQ, NULL, NULL, 0, 0);
-		else if (eid == rep->master_id)		/* Case 2. */
-			ret = __rep_resend_req(env, 0);
-		else if (F_ISSET(rep, REP_F_CLIENT))	/* Case 3. */
+		else if (eid == rep->master_id)	{	/* Case 2. */
+			/*
+			 * When we receive log messages in the SYNC_PAGE stage
+			 * and we decide to rerequest, it often means the pages
+			 * we expect have been dropped.  Send a rerequest with
+			 * gapflags for better performance.
+			 */
+			if ((rectype == REP_LOG || rectype == REP_BULK_LOG ||
+			    rectype == REP_LOG_MORE) &&
+			    rep->sync_state == SYNC_PAGE)
+				ret = __rep_resend_req(env, 1);
+			else
+				ret = __rep_resend_req(env, 0);
+		} else if (F_ISSET(rep, REP_F_CLIENT))	/* Case 3. */
 			(void)__rep_send_message(env,
 			    eid, REP_REREQUEST, NULL, NULL, 0, 0);
 	}
@@ -2421,7 +2659,6 @@ __rep_check_missing(env, gen, master_perm_lsn)
 	DB_LOG *dblp;
 	DB_LSN *end_lsn;
 	DB_REP *db_rep;
-	DB_THREAD_INFO *ip;
 	LOG *lp;
 	REGINFO *infop;
 	REP *rep;
@@ -2434,7 +2671,6 @@ __rep_check_missing(env, gen, master_perm_lsn)
 	infop = env->reginfo;
 	has_log_gap = has_page_gap = ret = 0;
 
-	ENV_ENTER(env, ip);
 	MUTEX_LOCK(env, rep->mtx_clientdb);
 	REP_SYSTEM_LOCK(env);
 	/*
@@ -2518,8 +2754,7 @@ __rep_check_missing(env, gen, master_perm_lsn)
 	rep->msg_th--;
 	REP_SYSTEM_UNLOCK(env);
 
-out:	ENV_LEAVE(env, ip);
-	return (ret);
+out:	return (ret);
 }
 
 static int
diff --git a/src/rep/rep_region.c b/src/rep/rep_region.c
index f1d69dff..72372bff 100644
--- a/src/rep/rep_region.c
+++ b/src/rep/rep_region.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -14,6 +14,8 @@
 
 static int __rep_egen_init  __P((ENV *, REP *));
 static int __rep_gen_init  __P((ENV *, REP *));
+static int __rep_view_init  __P((ENV *, REP *));
+static int __rep_viewfile_exists  __P((ENV *, int *));
 
 /*
  * __rep_open --
@@ -29,7 +31,7 @@ __rep_open(env)
 	REGENV *renv;
 	REGINFO *infop;
 	REP *rep;
-	int i, ret;
+	int i, ret, view;
 	char *p;
 	char fname[sizeof(REP_DIAGNAME) + 3];
 
@@ -37,10 +39,15 @@ __rep_open(env)
 	infop = env->reginfo;
 	renv = infop->primary;
 	ret = 0;
+	view = 0;
 	DB_ASSERT(env, DBREP_DIAG_FILES < 100);
 
 	if (renv->rep_off == INVALID_ROFF) {
-		/* Must create the region. */
+		/*
+		 * Must create the region. This environment either is being
+		 * created for the first time or has just had its regions
+		 * cleared by a recovery.
+		 */
 		if ((ret = __env_alloc(infop, sizeof(REP), &rep)) != 0)
 			return (ret);
 		memset(rep, 0, sizeof(*rep));
@@ -108,6 +115,23 @@ __rep_open(env)
 			return (ret);
 		if ((ret = __rep_egen_init(env, rep)) != 0)
 			return (ret);
+		/*
+		 * Determine if this is a view site or not.  It is a view
+		 * if the callback is set.  If the site was a view in the
+		 * past, we mark it as a view, but will check consistency
+		 * later when starting replication.
+		 */
+		if (db_rep->partial != NULL) {
+			rep->stat.st_view = 1;
+			if ((ret = __rep_view_init(env, rep)) != 0)
+				return (ret);
+		} else {
+			if ((ret = __rep_viewfile_exists(env, &view)) != 0)
+				return (ret);
+			if (view)
+				rep->stat.st_view = 1;
+		}
+
 		rep->gbytes = db_rep->gbytes;
 		rep->bytes = db_rep->bytes;
 		rep->request_gap = db_rep->request_gap;
@@ -157,6 +181,32 @@ __rep_open(env)
 			    "process joining the environment"));
 			return (EINVAL);
 		}
+		/*
+		 * If we are joining an existing environment and we
+		 * have a view callback set, then the environment must
+		 * already be a view.  If not, error.
+		 *
+		 * The other mismatch is not an error here (no callback
+		 * set, but environment is a view) because we may be a
+		 * rep unaware process such as db_stat and that is allowed
+		 * to proceed.  There is additional checking in other rep
+		 * functions like rep_start to confirm consistency before
+		 * using replication.
+		 */
+		if (db_rep->partial != NULL) {
+			if ((ret = __rep_viewfile_exists(env, &view)) != 0)
+				return (ret);
+			/*
+			 * If there is a callback, and we are not in-memory,
+			 * there better be a view system file too.
+			 */
+			if (view == 0 && !FLD_ISSET(rep->config, REP_C_INMEM)) {
+				__db_errx(env, DB_STR("3688",
+				    "Application environment and view mismatch "
+				    "joining the environment"));
+				return (EINVAL);
+			}
+		}
 #ifdef HAVE_REPLICATION_THREADS
 		if ((ret = __repmgr_join(env, rep)) != 0)
 			return (ret);
@@ -506,9 +556,8 @@ __rep_write_egen(env, rep, egen)
 	 * If running in-memory replication, return without any file
 	 * operations.
 	 */
-	if (FLD_ISSET(rep->config, REP_C_INMEM)) {
+	if (FLD_ISSET(rep->config, REP_C_INMEM))
 		return (0);
-	}
 
 	if ((ret = __db_appname(env,
 	    DB_APP_META, REP_EGENNAME, NULL, &p)) != 0)
@@ -591,9 +640,8 @@ __rep_write_gen(env, rep, gen)
 	 * If running in-memory replication, return without any file
 	 * operations.
 	 */
-	if (FLD_ISSET(rep->config, REP_C_INMEM)) {
+	if (FLD_ISSET(rep->config, REP_C_INMEM))
 		return (0);
-	}
 
 	if ((ret = __db_appname(env,
 	    DB_APP_META, REP_GENNAME, NULL, &p)) != 0)
@@ -608,3 +656,105 @@ __rep_write_gen(env, rep, gen)
 	__os_free(env, p);
 	return (ret);
 }
+
+/*
+ * __rep_view_init --
+ *	Initialize the permanent view file to know this site is a view
+ *	forever.  The existence of the file is the record.
+ */
+static int
+__rep_view_init(env, rep)
+	ENV *env;
+	REP *rep;
+{
+	DB_FH *fhp;
+	int ret;
+	char *p;
+
+	/*
+	 * If running in-memory replication, return without any file
+	 * operations.
+	 */
+	if (FLD_ISSET(rep->config, REP_C_INMEM))
+		return (0);
+
+	if ((ret = __db_appname(env,
+	    DB_APP_META, REPVIEW, NULL, &p)) != 0)
+		return (ret);
+
+	/*
+	 * If the file doesn't exist, create it.  We just want to open
+	 * and close the file.  It doesn't have any content.
+	 * If the file already exists, there is nothing else to do.
+	 */
+	if (__os_exists(env, p, NULL) != 0) {
+		RPRINT(env, (env, DB_VERB_REP_MISC, "View init: Create %s", p));
+		if ((ret = __os_open(env, p, 0,
+		    DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &fhp)) != 0)
+			goto out;
+		(void)__os_closehandle(env, fhp);
+	}
+out:	__os_free(env, p);
+	return (ret);
+}
+
+/*
+ * __rep_check_view --
+ *	Check consistency between the view file and the db_rep handle.
+ *
+ * PUBLIC: int __rep_check_view __P((ENV *));
+ */
+int
+__rep_check_view(env)
+	ENV *env;
+{
+	DB_REP *db_rep;
+	REP *rep;
+	int exist, ret;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	ret = 0;
+
+	/*
+	 * If running in-memory replication, check without any file
+	 * operations.  We can only check what exists in the region,
+	 * which is the st_view field from a previous open.
+	 */
+	if (FLD_ISSET(rep->config, REP_C_INMEM))
+		exist = (int)rep->stat.st_view;
+	else if ((ret = __rep_viewfile_exists(env, &exist)) != 0)
+		return (ret);
+
+	RPRINT(env, (env, DB_VERB_REP_MISC, "Check view.  Exist %d, cb %d",
+	    exist, (db_rep->partial != NULL)));
+	/*
+	 * If view file exists, a partial function must be set.
+	 * If view file does not exist, a partial function must not be set.
+	 */
+	if ((exist == 0 && db_rep->partial != NULL) ||
+	    (exist == 1 && db_rep->partial == NULL))
+		ret = EINVAL;
+	return (ret);
+}
+
+static int
+__rep_viewfile_exists(env, existp)
+	ENV *env;
+	int *existp;
+{
+	char *p;
+	int ret;
+
+	*existp = 0;
+	if ((ret = __db_appname(env,
+	    DB_APP_META, REPVIEW, NULL, &p)) != 0)
+		return (ret);
+
+	if (__os_exists(env, p, NULL) == 0)
+		*existp = 1;
+
+	__os_free(env, p);
+	return (ret);
+
+}
diff --git a/src/rep/rep_stat.c b/src/rep/rep_stat.c
index addfee25..ffb9f262 100644
--- a/src/rep/rep_stat.c
+++ b/src/rep/rep_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -73,6 +73,13 @@ static const char *__rep_syncstate_to_string __P((repsync_t));
 	}								\
 } while (0)
 
+#define	PRINT_VIEW(sp) do {						\
+	if ((sp)->st_view != 0)						\
+		__db_msg(env, "Environment configured as view site");	\
+	else								\
+		__db_msg(env, "Environment not configured as view site");\
+} while (0)
+
 /*
  * __rep_stat_pp --
  *	ENV->rep_stat pre/post processing.
@@ -120,7 +127,7 @@ __rep_stat(env, statp, flags)
 	DB_REP_STAT *stats;
 	LOG *lp;
 	REP *rep;
-	u_int32_t startupdone;
+	u_int32_t startupdone, view;
 	uintmax_t queued;
 	int dolock, ret;
 
@@ -177,10 +184,12 @@ __rep_stat(env, statp, flags)
 	if (LF_ISSET(DB_STAT_CLEAR)) {
 		queued = rep->stat.st_log_queued;
 		startupdone = rep->stat.st_startup_complete;
+		view = rep->stat.st_view;
 		memset(&rep->stat, 0, sizeof(rep->stat));
 		rep->stat.st_log_queued = rep->stat.st_log_queued_total =
 		    rep->stat.st_log_queued_max = queued;
 		rep->stat.st_startup_complete = startupdone;
+		rep->stat.st_view = view;
 	}
 
 	/*
@@ -377,6 +386,7 @@ __rep_print_stats(env, flags)
 	__db_dl(env, "Number of page records missed and requested",
 	    (u_long)sp->st_pg_requested);
 	PRINT_STARTUPCOMPLETE(sp);
+	PRINT_VIEW(sp);
 	__db_dl(env,
 	    "Number of transactions applied", (u_long)sp->st_txns_applied);
 
@@ -462,16 +472,20 @@ __rep_print_all(env, flags)
 	u_int32_t flags;
 {
 	static const FN rep_cfn[] = {
-		{ REP_C_2SITE_STRICT,	"REP_C_2SITE_STRICT" },
-		{ REP_C_AUTOINIT,	"REP_C_AUTOINIT" },
-		{ REP_C_AUTOROLLBACK,	"REP_C_AUTOROLLBACK" },
-		{ REP_C_BULK,		"REP_C_BULK" },
-		{ REP_C_DELAYCLIENT,	"REP_C_DELAYCLIENT" },
-		{ REP_C_ELECTIONS,	"REP_C_ELECTIONS" },
-		{ REP_C_INMEM,		"REP_C_INMEM" },
-		{ REP_C_LEASE,		"REP_C_LEASE" },
-		{ REP_C_NOWAIT,		"REP_C_NOWAIT" },
-		{ 0,			NULL }
+		{ REP_C_2SITE_STRICT,		"REP_C_2SITE_STRICT" },
+		{ REP_C_AUTOINIT,		"REP_C_AUTOINIT" },
+		{ REP_C_AUTOROLLBACK,		"REP_C_AUTOROLLBACK" },
+		{ REP_C_AUTOTAKEOVER,		"REP_C_AUTOTAKEOVER" },
+		{ REP_C_BULK,			"REP_C_BULK" },
+		{ REP_C_DELAYCLIENT,		"REP_C_DELAYCLIENT" },
+		{ REP_C_ELECT_LOGLENGTH,	"REP_C_ELECT_LOGLENGTH" },
+		{ REP_C_ELECTIONS,		"REP_C_ELECTIONS" },
+		{ REP_C_INMEM,			"REP_C_INMEM" },
+		{ REP_C_LEASE,			"REP_C_LEASE" },
+		{ REP_C_NOWAIT,			"REP_C_NOWAIT" },
+		{ REP_C_PREFMAS_CLIENT,		"REP_C_PREFMAS_CLIENT" },
+		{ REP_C_PREFMAS_MASTER,		"REP_C_PREFMAS_MASTER" },
+		{ 0,				NULL }
 	};
 	static const FN rep_efn[] = {
 		{ REP_E_PHASE0,		"REP_E_PHASE0" },
@@ -481,19 +495,21 @@ __rep_print_all(env, flags)
 		{ 0,			NULL }
 	};
 	static const FN rep_fn[] = {
-		{ REP_F_ABBREVIATED,	"REP_F_ABBREVIATED" },
-		{ REP_F_APP_BASEAPI,	"REP_F_APP_BASEAPI" },
-		{ REP_F_APP_REPMGR,	"REP_F_APP_REPMGR" },
-		{ REP_F_CLIENT,		"REP_F_CLIENT" },
-		{ REP_F_DELAY,		"REP_F_DELAY" },
-		{ REP_F_GROUP_ESTD,	"REP_F_GROUP_ESTD" },
-		{ REP_F_LEASE_EXPIRED,	"REP_F_LEASE_EXPIRED" },
-		{ REP_F_MASTER,		"REP_F_MASTER" },
-		{ REP_F_MASTERELECT,	"REP_F_MASTERELECT" },
-		{ REP_F_NEWFILE,	"REP_F_NEWFILE" },
-		{ REP_F_NIMDBS_LOADED,	"REP_F_NIMDBS_LOADED" },
-		{ REP_F_SKIPPED_APPLY,	"REP_F_SKIPPED_APPLY" },
-		{ REP_F_START_CALLED,	"REP_F_START_CALLED" },
+		{ REP_F_ABBREVIATED,		"REP_F_ABBREVIATED" },
+		{ REP_F_APP_BASEAPI,		"REP_F_APP_BASEAPI" },
+		{ REP_F_APP_REPMGR,		"REP_F_APP_REPMGR" },
+		{ REP_F_CLIENT,			"REP_F_CLIENT" },
+		{ REP_F_DELAY,			"REP_F_DELAY" },
+		{ REP_F_GROUP_ESTD,		"REP_F_GROUP_ESTD" },
+		{ REP_F_HOLD_GEN,		"REP_F_HOLD_GEN" },
+		{ REP_F_LEASE_EXPIRED,		"REP_F_LEASE_EXPIRED" },
+		{ REP_F_MASTER,			"REP_F_MASTER" },
+		{ REP_F_MASTERELECT,		"REP_F_MASTERELECT" },
+		{ REP_F_NEWFILE,		"REP_F_NEWFILE" },
+		{ REP_F_NIMDBS_LOADED,		"REP_F_NIMDBS_LOADED" },
+		{ REP_F_READONLY_MASTER,	"REP_F_READONLY_MASTER" },
+		{ REP_F_SKIPPED_APPLY,		"REP_F_SKIPPED_APPLY" },
+		{ REP_F_START_CALLED,		"REP_F_START_CALLED" },
 		{ 0,			NULL }
 	};
 	static const FN rep_lfn[] = {
@@ -523,15 +539,16 @@ __rep_print_all(env, flags)
 	rep = db_rep->region;
 	infop = env->reginfo;
 	renv = infop->primary;
-	ENV_ENTER(env, ip);
 
 	__db_msg(env, "%s", DB_GLOBAL(db_line));
 	__db_msg(env, "DB_REP handle information:");
 
 	if (db_rep->rep_db == NULL)
 		STAT_ISSET("Bookkeeping database", db_rep->rep_db);
-	else
+	else {
+		ENV_GET_THREAD_INFO(env, ip);
 		(void)__db_stat_print(db_rep->rep_db, ip, flags);
+	}
 
 	__db_prflags(env, NULL, db_rep->flags, dbrep_fn, NULL, "\tFlags");
 
@@ -604,7 +621,6 @@ __rep_print_all(env, flags)
 	STAT_LONG("Maximum lease timestamp microseconds",
 	    lp->max_lease_ts.tv_nsec / NS_PER_US);
 	MUTEX_UNLOCK(env, rep->mtx_clientdb);
-	ENV_LEAVE(env, ip);
 
 	return (0);
 }
@@ -648,8 +664,10 @@ __rep_stat_summary_print(env)
 	ret = 0;
 	if ((ret = __rep_stat(env, &sp, 0)) == 0) {
 		PRINT_STATUS(sp, is_client);
-		if (is_client)
+		if (is_client) {
 			PRINT_STARTUPCOMPLETE(sp);
+			PRINT_VIEW(sp);
+		}
 		PRINT_MAXPERMLSN(sp);
 		/*
 		 * Use the number of sites that is kept up-to-date most
diff --git a/src/rep/rep_stub.c b/src/rep/rep_stub.c
index 2d96ea59..51c79eb0 100644
--- a/src/rep/rep_stub.c
+++ b/src/rep/rep_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -130,7 +130,7 @@ __rep_elect_pp(dbenv, nsites, nvotes, flags)
 }
 
 int
-__rep_flush(dbenv)
+__rep_flush_pp(dbenv)
 	DB_ENV *dbenv;
 {
 	return (__db_norep(dbenv->env));
@@ -201,7 +201,7 @@ __rep_get_nsites(dbenv, n)
 }
 
 int
-__rep_set_priority(dbenv, priority)
+__rep_set_priority_pp(dbenv, priority)
 	DB_ENV *dbenv;
 	u_int32_t priority;
 {
@@ -219,7 +219,7 @@ __rep_get_priority(dbenv, priority)
 }
 
 int
-__rep_set_timeout(dbenv, which, timeout)
+__rep_set_timeout_pp(dbenv, which, timeout)
 	DB_ENV *dbenv;
 	int which;
 	db_timeout_t timeout;
@@ -342,6 +342,16 @@ __rep_set_transport_pp(dbenv, eid, f_send)
 }
 
 int
+__rep_set_view(dbenv, f_partial)
+	DB_ENV *dbenv;
+	int (*f_partial) __P((DB_ENV *,
+	    const char *, int *, u_int32_t));
+{
+	COMPQUIET(f_partial, NULL);
+	return (__db_norep(dbenv->env));
+}
+
+int
 __rep_set_request(dbenv, min, max)
 	DB_ENV *dbenv;
 	u_int32_t min, max;
diff --git a/src/rep/rep_util.c b/src/rep/rep_util.c
index 0dfe6122..5ee2592f 100644
--- a/src/rep/rep_util.c
+++ b/src/rep/rep_util.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -11,6 +11,7 @@
 #include "db_int.h"
 #include "dbinc/db_page.h"
 #include "dbinc/db_am.h"
+#include "dbinc/fop.h"
 #include "dbinc/mp.h"
 #include "dbinc/txn.h"
 
@@ -437,7 +438,7 @@ __rep_send_message(env, eid, rtype, lsnp, dbt, ctlflags, repflags)
 	    FLD_ISSET(ctlflags, REPCTL_LEASE | REPCTL_PERM)) {
 		F_SET(&cntrl, REPCTL_LEASE);
 		DB_ASSERT(env, rep->version == DB_REPVERSION);
-		__os_gettime(env, &msg_time, 1);
+		__os_gettime(env, &msg_time, 0);
 		cntrl.msg_sec = (u_int32_t)msg_time.tv_sec;
 		cntrl.msg_nsec = (u_int32_t)msg_time.tv_nsec;
 	}
@@ -591,6 +592,15 @@ __rep_new_master(env, cntrl, eid)
 	ret = 0;
 	logc = NULL;
 	lockout_msg = 0;
+
+	/*
+	 * If REP_F_HOLD_GEN is set, we want to keep this site at its
+	 * current gen.  Do not process an incoming NEWMASTER, which
+	 * would change the gen.
+	 */
+	if (F_ISSET(rep, REP_F_HOLD_GEN))
+		return (ret);
+
 	REP_SYSTEM_LOCK(env);
 	change = rep->gen != cntrl->gen || rep->master_id != eid;
 	/*
@@ -1128,6 +1138,8 @@ __env_db_rep_exit(env)
 	rep = db_rep->region;
 
 	REP_SYSTEM_LOCK(env);
+	/* If we have a reference, it better not already be 0. */
+	DB_ASSERT(env, rep->handle_cnt != 0);
 	rep->handle_cnt--;
 	REP_SYSTEM_UNLOCK(env);
 
@@ -1190,7 +1202,7 @@ __db_rep_enter(dbp, checkgen, checklock, return_now)
 	 * get an exclusive lock on this database.
 	 */
 	if (checkgen && dbp->mpf->mfp && IS_REP_CLIENT(env)) {
-		if (dbp->mpf->mfp->excl_lockout) 
+		if (dbp->mpf->mfp->excl_lockout)
 			return (DB_REP_HANDLE_DEAD);
 	}
 
@@ -1328,7 +1340,8 @@ __op_rep_exit(env)
 	rep = db_rep->region;
 
 	REP_SYSTEM_LOCK(env);
-	DB_ASSERT(env, rep->op_cnt > 0);
+	/* If we have a reference, it better not already be 0. */
+	DB_ASSERT(env, rep->op_cnt != 0);
 	rep->op_cnt--;
 	REP_SYSTEM_UNLOCK(env);
 
@@ -1697,7 +1710,9 @@ __rep_msg_to_old(version, rectype)
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
-	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID },
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID },
 	/*
 	 * 4.2/DB_REPVERSION 1 no longer supported.
 	 */
@@ -1708,7 +1723,9 @@ __rep_msg_to_old(version, rectype)
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
-	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID },
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID },
 	/*
 	 * 4.3/DB_REPVERSION 2 no longer supported.
 	 */
@@ -1719,7 +1736,9 @@ __rep_msg_to_old(version, rectype)
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
-	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID },
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID },
 	/*
 	 * From 4.7 message number To 4.4/4.5 message number
 	 */
@@ -1727,6 +1746,11 @@ __rep_msg_to_old(version, rectype)
 	    1,			/* REP_ALIVE */
 	    2,			/* REP_ALIVE_REQ */
 	    3,			/* REP_ALL_REQ */
+	    REP_INVALID,	/* REP_BLOB_ALL_REQ */
+	    REP_INVALID,	/* REP_BLOB_CHUNK */
+	    REP_INVALID,	/* REP_BLOB_CHUNK_REQ */
+	    REP_INVALID,	/* REP_BLOB_UPDATE */
+	    REP_INVALID,	/* REP_BLOB_UPDATE_REQ */
 	    4,			/* REP_BULK_LOG */
 	    5,			/* REP_BULK_PAGE */
 	    6,			/* REP_DUPMASTER */
@@ -1765,6 +1789,11 @@ __rep_msg_to_old(version, rectype)
 	    1,			/* REP_ALIVE */
 	    2,			/* REP_ALIVE_REQ */
 	    3,			/* REP_ALL_REQ */
+	    REP_INVALID,	/* REP_BLOB_ALL_REQ */
+	    REP_INVALID,	/* REP_BLOB_CHUNK */
+	    REP_INVALID,	/* REP_BLOB_CHUNK_REQ */
+	    REP_INVALID,	/* REP_BLOB_UPDATE */
+	    REP_INVALID,	/* REP_BLOB_UPDATE_REQ */
 	    4,			/* REP_BULK_LOG */
 	    5,			/* REP_BULK_PAGE */
 	    6,			/* REP_DUPMASTER */
@@ -1803,6 +1832,11 @@ __rep_msg_to_old(version, rectype)
 	    1,			/* REP_ALIVE */
 	    2,			/* REP_ALIVE_REQ */
 	    3,			/* REP_ALL_REQ */
+	    REP_INVALID,	/* REP_BLOB_ALL_REQ */
+	    REP_INVALID,	/* REP_BLOB_CHUNK */
+	    REP_INVALID,	/* REP_BLOB_CHUNK_REQ */
+	    REP_INVALID,	/* REP_BLOB_UPDATE */
+	    REP_INVALID,	/* REP_BLOB_UPDATE_REQ */
 	    4,			/* REP_BULK_LOG */
 	    5,			/* REP_BULK_PAGE */
 	    6,			/* REP_DUPMASTER */
@@ -1841,6 +1875,53 @@ __rep_msg_to_old(version, rectype)
 	    1,			/* REP_ALIVE */
 	    2,			/* REP_ALIVE_REQ */
 	    3,			/* REP_ALL_REQ */
+	    REP_INVALID,	/* REP_BLOB_ALL_REQ */
+	    REP_INVALID,	/* REP_BLOB_CHUNK */
+	    REP_INVALID,	/* REP_BLOB_CHUNK_REQ */
+	    REP_INVALID,	/* REP_BLOB_UPDATE */
+	    REP_INVALID,	/* REP_BLOB_UPDATE_REQ */
+	    4,			/* REP_BULK_LOG */
+	    5,			/* REP_BULK_PAGE */
+	    6,			/* REP_DUPMASTER */
+	    7,			/* REP_FILE */
+	    8,			/* REP_FILE_FAIL */
+	    9,			/* REP_FILE_REQ */
+	    10,			/* REP_LEASE_GRANT */
+	    11,			/* REP_LOG */
+	    12,			/* REP_LOG_MORE */
+	    13,			/* REP_LOG_REQ */
+	    14,			/* REP_MASTER_REQ */
+	    15,			/* REP_NEWCLIENT */
+	    16,			/* REP_NEWFILE */
+	    17,			/* REP_NEWMASTER */
+	    18,			/* REP_NEWSITE */
+	    19,			/* REP_PAGE */
+	    20,			/* REP_PAGE_FAIL */
+	    21,			/* REP_PAGE_MORE */
+	    22,			/* REP_PAGE_REQ */
+	    23,			/* REP_REREQUEST */
+	    24,			/* REP_START_SYNC */
+	    25,			/* REP_UPDATE */
+	    26,			/* REP_UPDATE_REQ */
+	    27,			/* REP_VERIFY */
+	    28,			/* REP_VERIFY_FAIL */
+	    29,			/* REP_VERIFY_REQ */
+	    30,			/* REP_VOTE1 */
+	    31			/* REP_VOTE2 */
+	},
+	/*
+	 * From 6.1 message number To 5.3 message number.  Messages
+	 * handling BLOBs were added.
+	 */
+	{   REP_INVALID,	/* NO message 0 */
+	    1,			/* REP_ALIVE */
+	    2,			/* REP_ALIVE_REQ */
+	    3,			/* REP_ALL_REQ */
+	    REP_INVALID,	/* REP_BLOB_ALL_REQ */
+	    REP_INVALID,	/* REP_BLOB_CHUNK */
+	    REP_INVALID,	/* REP_BLOB_CHUNK_REQ */
+	    REP_INVALID,	/* REP_BLOB_UPDATE */
+	    REP_INVALID,	/* REP_BLOB_UPDATE_REQ */
 	    4,			/* REP_BULK_LOG */
 	    5,			/* REP_BULK_PAGE */
 	    6,			/* REP_DUPMASTER */
@@ -1901,7 +1982,9 @@ __rep_msg_from_old(version, rectype)
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
-	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID },
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID },
 	/*
 	 * 4.2/DB_REPVERSION 1 no longer supported.
 	 */
@@ -1912,7 +1995,9 @@ __rep_msg_from_old(version, rectype)
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
-	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID },
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID },
 	/*
 	 * 4.3/DB_REPVERSION 2 no longer supported.
 	 */
@@ -1923,7 +2008,9 @@ __rep_msg_from_old(version, rectype)
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
 	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
-	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID },
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID,
+	    REP_INVALID },
 	/*
 	 * From 4.4/4.5 message number To 4.7 message number
 	 */
@@ -1931,36 +2018,41 @@ __rep_msg_from_old(version, rectype)
 	    1,			/* 1, REP_ALIVE */
 	    2,			/* 2, REP_ALIVE_REQ */
 	    3,			/* 3, REP_ALL_REQ */
-	    4,			/* 4, REP_BULK_LOG */
-	    5,			/* 5, REP_BULK_PAGE */
-	    6,			/* 6, REP_DUPMASTER */
-	    7,			/* 7, REP_FILE */
-	    8,			/* 8, REP_FILE_FAIL */
-	    9,			/* 9, REP_FILE_REQ */
-	    /* 10, REP_LEASE_GRANT doesn't exist */
-	    11,			/* 10, REP_LOG */
-	    12,			/* 11, REP_LOG_MORE */
-	    13,			/* 12, REP_LOG_REQ */
-	    14,			/* 13, REP_MASTER_REQ */
-	    15,			/* 14, REP_NEWCLIENT */
-	    16,			/* 15, REP_NEWFILE */
-	    17,			/* 16, REP_NEWMASTER */
-	    18,			/* 17, REP_NEWSITE */
-	    19,			/* 18, REP_PAGE */
-	    20,			/* 19, REP_PAGE_FAIL */
-	    21,			/* 20, REP_PAGE_MORE */
-	    22,			/* 21, REP_PAGE_REQ */
-	    23,			/* 22, REP_REREQUEST */
-	    /* 24, REP_START_SYNC doesn't exist */
-	    25,			/* 23, REP_UPDATE */
-	    26,			/* 24, REP_UPDATE_REQ */
-	    27,			/* 25, REP_VERIFY */
-	    28,			/* 26, REP_VERIFY_FAIL */
-	    29,			/* 27, REP_VERIFY_REQ */
-	    30,			/* 28, REP_VOTE1 */
-	    31,			/* 29, REP_VOTE2 */
+	    9,			/* 4, REP_BULK_LOG */
+	    10,			/* 5, REP_BULK_PAGE */
+	    11,			/* 6, REP_DUPMASTER */
+	    12,			/* 7, REP_FILE */
+	    13,			/* 8, REP_FILE_FAIL */
+	    14,			/* 9, REP_FILE_REQ */
+	    /* 15, REP_LEASE_GRANT doesn't exist */
+	    16,			/* 10, REP_LOG */
+	    17,			/* 11, REP_LOG_MORE */
+	    18,			/* 12, REP_LOG_REQ */
+	    19,			/* 13, REP_MASTER_REQ */
+	    20,			/* 14, REP_NEWCLIENT */
+	    21,			/* 15, REP_NEWFILE */
+	    22,			/* 16, REP_NEWMASTER */
+	    23,			/* 17, REP_NEWSITE */
+	    24,			/* 18, REP_PAGE */
+	    25,			/* 19, REP_PAGE_FAIL */
+	    26,			/* 20, REP_PAGE_MORE */
+	    27,			/* 21, REP_PAGE_REQ */
+	    28,			/* 22, REP_REREQUEST */
+	    /* 29, REP_START_SYNC doesn't exist */
+	    30,			/* 23, REP_UPDATE */
+	    31,			/* 24, REP_UPDATE_REQ */
+	    32,			/* 25, REP_VERIFY */
+	    33,			/* 26, REP_VERIFY_FAIL */
+	    34,			/* 27, REP_VERIFY_REQ */
+	    35,			/* 28, REP_VOTE1 */
+	    36,			/* 29, REP_VOTE2 */
 	    REP_INVALID,	/* 30, 4.4/4.5 no message */
-	    REP_INVALID		/* 31, 4.4/4.5 no message */
+	    REP_INVALID,	/* 31, 4.4/4.5 no message */
+	    REP_INVALID,	/* 32, 4.4/4.5 no message */
+	    REP_INVALID,	/* 33, 4.4/4.5 no message */
+	    REP_INVALID,	/* 34, 4.4/4.5 no message */
+	    REP_INVALID,	/* 35, 4.4/4.5 no message */
+	    REP_INVALID		/* 36, 4.4/4.5 no message */
 	},
 	/*
 	 * From 4.6 message number To 4.7 message number.  There are
@@ -1971,34 +2063,39 @@ __rep_msg_from_old(version, rectype)
 	    1,			/* 1, REP_ALIVE */
 	    2,			/* 2, REP_ALIVE_REQ */
 	    3,			/* 3, REP_ALL_REQ */
-	    4,			/* 4, REP_BULK_LOG */
-	    5,			/* 5, REP_BULK_PAGE */
-	    6,			/* 6, REP_DUPMASTER */
-	    7,			/* 7, REP_FILE */
-	    8,			/* 8, REP_FILE_FAIL */
-	    9,			/* 9, REP_FILE_REQ */
-	    10,			/* 10, REP_LEASE_GRANT */
-	    11,			/* 11, REP_LOG */
-	    12,			/* 12, REP_LOG_MORE */
-	    13,			/* 13, REP_LOG_REQ */
-	    14,			/* 14, REP_MASTER_REQ */
-	    15,			/* 15, REP_NEWCLIENT */
-	    16,			/* 16, REP_NEWFILE */
-	    17,			/* 17, REP_NEWMASTER */
-	    18,			/* 18, REP_NEWSITE */
-	    19,			/* 19, REP_PAGE */
-	    20,			/* 20, REP_PAGE_FAIL */
-	    21,			/* 21, REP_PAGE_MORE */
-	    22,			/* 22, REP_PAGE_REQ */
-	    23,			/* 22, REP_REREQUEST */
-	    24,			/* 24, REP_START_SYNC */
-	    25,			/* 25, REP_UPDATE */
-	    26,			/* 26, REP_UPDATE_REQ */
-	    27,			/* 27, REP_VERIFY */
-	    28,			/* 28, REP_VERIFY_FAIL */
-	    29,			/* 29, REP_VERIFY_REQ */
-	    30,			/* 30, REP_VOTE1 */
-	    31			/* 31, REP_VOTE2 */
+	    9,			/* 4, REP_BULK_LOG */
+	    10,			/* 5, REP_BULK_PAGE */
+	    11,			/* 6, REP_DUPMASTER */
+	    12,			/* 7, REP_FILE */
+	    13,			/* 8, REP_FILE_FAIL */
+	    14,			/* 9, REP_FILE_REQ */
+	    15,			/* 10, REP_LEASE_GRANT */
+	    16,			/* 11, REP_LOG */
+	    17,			/* 12, REP_LOG_MORE */
+	    18,			/* 13, REP_LOG_REQ */
+	    19,			/* 14, REP_MASTER_REQ */
+	    20,			/* 15, REP_NEWCLIENT */
+	    21,			/* 16, REP_NEWFILE */
+	    22,			/* 17, REP_NEWMASTER */
+	    23,			/* 18, REP_NEWSITE */
+	    24,			/* 19, REP_PAGE */
+	    25,			/* 20, REP_PAGE_FAIL */
+	    26,			/* 21, REP_PAGE_MORE */
+	    27,			/* 22, REP_PAGE_REQ */
+	    28,			/* 22, REP_REREQUEST */
+	    29,			/* 24, REP_START_SYNC */
+	    30,			/* 25, REP_UPDATE */
+	    31,			/* 26, REP_UPDATE_REQ */
+	    32,			/* 27, REP_VERIFY */
+	    33,			/* 28, REP_VERIFY_FAIL */
+	    34,			/* 29, REP_VERIFY_REQ */
+	    35,			/* 30, REP_VOTE1 */
+	    36,			/* 31, REP_VOTE2 */
+	    REP_INVALID,	/* 32, 4.6/4.7 no message */
+	    REP_INVALID,	/* 33, 4.6/4.7 no message */
+	    REP_INVALID,	/* 34, 4.6/4.7 no message */
+	    REP_INVALID,	/* 35, 4.6/4.7 no message */
+	    REP_INVALID		/* 36, 4.6/4.7 no message */
 	},
 	/*
 	 * From 4.7 message number To 5.2 message number.  There are
@@ -2009,34 +2106,39 @@ __rep_msg_from_old(version, rectype)
 	    1,			/* 1, REP_ALIVE */
 	    2,			/* 2, REP_ALIVE_REQ */
 	    3,			/* 3, REP_ALL_REQ */
-	    4,			/* 4, REP_BULK_LOG */
-	    5,			/* 5, REP_BULK_PAGE */
-	    6,			/* 6, REP_DUPMASTER */
-	    7,			/* 7, REP_FILE */
-	    8,			/* 8, REP_FILE_FAIL */
-	    9,			/* 9, REP_FILE_REQ */
-	    10,			/* 10, REP_LEASE_GRANT */
-	    11,			/* 11, REP_LOG */
-	    12,			/* 12, REP_LOG_MORE */
-	    13,			/* 13, REP_LOG_REQ */
-	    14,			/* 14, REP_MASTER_REQ */
-	    15,			/* 15, REP_NEWCLIENT */
-	    16,			/* 16, REP_NEWFILE */
-	    17,			/* 17, REP_NEWMASTER */
-	    18,			/* 18, REP_NEWSITE */
-	    19,			/* 19, REP_PAGE */
-	    20,			/* 20, REP_PAGE_FAIL */
-	    21,			/* 21, REP_PAGE_MORE */
-	    22,			/* 22, REP_PAGE_REQ */
-	    23,			/* 22, REP_REREQUEST */
-	    24,			/* 24, REP_START_SYNC */
-	    25,			/* 25, REP_UPDATE */
-	    26,			/* 26, REP_UPDATE_REQ */
-	    27,			/* 27, REP_VERIFY */
-	    28,			/* 28, REP_VERIFY_FAIL */
-	    29,			/* 29, REP_VERIFY_REQ */
-	    30,			/* 30, REP_VOTE1 */
-	    31			/* 31, REP_VOTE2 */
+	    9,			/* 4, REP_BULK_LOG */
+	    10,			/* 5, REP_BULK_PAGE */
+	    11,			/* 6, REP_DUPMASTER */
+	    12,			/* 7, REP_FILE */
+	    13,			/* 8, REP_FILE_FAIL */
+	    14,			/* 9, REP_FILE_REQ */
+	    15,			/* 10, REP_LEASE_GRANT */
+	    16,			/* 11, REP_LOG */
+	    17,			/* 12, REP_LOG_MORE */
+	    18,			/* 13, REP_LOG_REQ */
+	    19,			/* 14, REP_MASTER_REQ */
+	    20,			/* 15, REP_NEWCLIENT */
+	    21,			/* 16, REP_NEWFILE */
+	    22,			/* 17, REP_NEWMASTER */
+	    23,			/* 18, REP_NEWSITE */
+	    24,			/* 19, REP_PAGE */
+	    25,			/* 20, REP_PAGE_FAIL */
+	    26,			/* 21, REP_PAGE_MORE */
+	    27,			/* 22, REP_PAGE_REQ */
+	    28,			/* 22, REP_REREQUEST */
+	    29,			/* 24, REP_START_SYNC */
+	    30,			/* 25, REP_UPDATE */
+	    31,			/* 26, REP_UPDATE_REQ */
+	    32,			/* 27, REP_VERIFY */
+	    33,			/* 28, REP_VERIFY_FAIL */
+	    34,			/* 29, REP_VERIFY_REQ */
+	    35,			/* 30, REP_VOTE1 */
+	    36,			/* 31, REP_VOTE2 */
+	    REP_INVALID,	/* 32, 4.7/5.2 no message */
+	    REP_INVALID,	/* 33, 4.7/5.2 no message */
+	    REP_INVALID,	/* 34, 4.7/5.2 no message */
+	    REP_INVALID,	/* 35, 4.7/5.2 no message */
+	    REP_INVALID		/* 36, 4.7/5.2 no message */
 	},
 	/*
 	 * From 4.7 message number To 5.3 message number.  There are
@@ -2047,34 +2149,86 @@ __rep_msg_from_old(version, rectype)
 	    1,			/* 1, REP_ALIVE */
 	    2,			/* 2, REP_ALIVE_REQ */
 	    3,			/* 3, REP_ALL_REQ */
-	    4,			/* 4, REP_BULK_LOG */
-	    5,			/* 5, REP_BULK_PAGE */
-	    6,			/* 6, REP_DUPMASTER */
-	    7,			/* 7, REP_FILE */
-	    8,			/* 8, REP_FILE_FAIL */
-	    9,			/* 9, REP_FILE_REQ */
-	    10,			/* 10, REP_LEASE_GRANT */
-	    11,			/* 11, REP_LOG */
-	    12,			/* 12, REP_LOG_MORE */
-	    13,			/* 13, REP_LOG_REQ */
-	    14,			/* 14, REP_MASTER_REQ */
-	    15,			/* 15, REP_NEWCLIENT */
-	    16,			/* 16, REP_NEWFILE */
-	    17,			/* 17, REP_NEWMASTER */
-	    18,			/* 18, REP_NEWSITE */
-	    19,			/* 19, REP_PAGE */
-	    20,			/* 20, REP_PAGE_FAIL */
-	    21,			/* 21, REP_PAGE_MORE */
-	    22,			/* 22, REP_PAGE_REQ */
-	    23,			/* 22, REP_REREQUEST */
-	    24,			/* 24, REP_START_SYNC */
-	    25,			/* 25, REP_UPDATE */
-	    26,			/* 26, REP_UPDATE_REQ */
-	    27,			/* 27, REP_VERIFY */
-	    28,			/* 28, REP_VERIFY_FAIL */
-	    29,			/* 29, REP_VERIFY_REQ */
-	    30,			/* 30, REP_VOTE1 */
-	    31			/* 31, REP_VOTE2 */
+	    9,			/* 4, REP_BULK_LOG */
+	    10,			/* 5, REP_BULK_PAGE */
+	    11,			/* 6, REP_DUPMASTER */
+	    12,			/* 7, REP_FILE */
+	    13,			/* 8, REP_FILE_FAIL */
+	    14,			/* 9, REP_FILE_REQ */
+	    15,			/* 10, REP_LEASE_GRANT */
+	    16,			/* 11, REP_LOG */
+	    17,			/* 12, REP_LOG_MORE */
+	    18,			/* 13, REP_LOG_REQ */
+	    19,			/* 14, REP_MASTER_REQ */
+	    20,			/* 15, REP_NEWCLIENT */
+	    21,			/* 16, REP_NEWFILE */
+	    22,			/* 17, REP_NEWMASTER */
+	    23,			/* 18, REP_NEWSITE */
+	    24,			/* 19, REP_PAGE */
+	    25,			/* 20, REP_PAGE_FAIL */
+	    26,			/* 21, REP_PAGE_MORE */
+	    27,			/* 22, REP_PAGE_REQ */
+	    28,			/* 22, REP_REREQUEST */
+	    29,			/* 24, REP_START_SYNC */
+	    30,			/* 25, REP_UPDATE */
+	    31,			/* 26, REP_UPDATE_REQ */
+	    32,			/* 27, REP_VERIFY */
+	    33,			/* 28, REP_VERIFY_FAIL */
+	    34,			/* 29, REP_VERIFY_REQ */
+	    35,			/* 30, REP_VOTE1 */
+	    36,			/* 31, REP_VOTE2 */
+	    REP_INVALID,	/* 32, 4.7/5.3 no message */
+	    REP_INVALID,	/* 33, 4.7/5.3 no message */
+	    REP_INVALID,	/* 34, 4.7/5.3 no message */
+	    REP_INVALID,	/* 35, 4.7/5.3 no message */
+	    REP_INVALID		/* 36, 4.7/5.3 no message */
+	},
+	/*
+	 * From 5.3 message number To 6.1 message number.  Messages to
+	 * handle BLOBs were added.
+	 */
+	{   REP_INVALID,	/* NO message 0 */
+	    1,			/* 1, REP_ALIVE */
+	    2,			/* 2, REP_ALIVE_REQ */
+	    3,			/* 3, REP_ALL_REQ */
+	    /* 4, REP_BLOB_ALL_REQ doesn't exist */
+	    /* 5, REP_BLOB_CHUNK doesn't exist */
+	    /* 6, REP_BLOB_CHUNK_REQ doesn't exist */
+	    /* 7, REP_BLOB_UPDATE doesn't exist */
+	    /* 8, REP_BLOB_UPDATE_REQ doesn't exist */
+	    9,			/* 4, REP_BULK_LOG */
+	    10,			/* 5, REP_BULK_PAGE */
+	    11,			/* 6, REP_DUPMASTER */
+	    12,			/* 7, REP_FILE */
+	    13,			/* 8, REP_FILE_FAIL */
+	    14,			/* 9, REP_FILE_REQ */
+	    15,			/* 10, REP_LEASE_GRANT */
+	    16,			/* 11, REP_LOG */
+	    17,			/* 12, REP_LOG_MORE */
+	    18,			/* 13, REP_LOG_REQ */
+	    19,			/* 14, REP_MASTER_REQ */
+	    20,			/* 15, REP_NEWCLIENT */
+	    21,			/* 16, REP_NEWFILE */
+	    22,			/* 17, REP_NEWMASTER */
+	    23,			/* 18, REP_NEWSITE */
+	    24,			/* 19, REP_PAGE */
+	    25,			/* 20, REP_PAGE_FAIL */
+	    26,			/* 21, REP_PAGE_MORE */
+	    27,			/* 22, REP_PAGE_REQ */
+	    28,			/* 23, REP_REREQUEST */
+	    29,			/* 24, REP_START_SYNC */
+	    30,			/* 25, REP_UPDATE */
+	    31,			/* 26, REP_UPDATE_REQ */
+	    32,			/* 27, REP_VERIFY */
+	    33,			/* 28, REP_VERIFY_FAIL */
+	    34,			/* 29, REP_VERIFY_REQ */
+	    35,			/* 30, REP_VOTE1 */
+	    36,			/* 31, REP_VOTE2 */
+	    REP_INVALID,	/* 32, 5.3/6.1 no message */
+	    REP_INVALID,	/* 33, 5.3/6.1 no message */
+	    REP_INVALID,	/* 34, 5.3/6.1 no message */
+	    REP_INVALID,	/* 35, 5.3/6.1 no message */
+	    REP_INVALID		/* 36, 5.3/6.1 no message */
 	}
 	};
 	return (table[version][rectype]);
@@ -2215,9 +2369,9 @@ __rep_print_int(env, verbose, fmt, ap)
 	__os_id(env->dbenv, &pid, &tid);
 	if (diag_msg)
 		MUTEX_LOCK(env, rep->mtx_diag);
-	__os_gettime(env, &ts, 1);
+	__os_gettime(env, &ts, 0);
 	__db_msgadd(env, &mb, "[%lu:%lu][%s] %s: ",
-	    (u_long)ts.tv_sec, (u_long)ts.tv_nsec/NS_PER_US,
+	    (u_long)ts.tv_sec, (u_long)ts.tv_nsec / NS_PER_US,
 	    env->dbenv->thread_id_string(env->dbenv, pid, tid, buf), s);
 
 	__db_msgadd_ap(env, &mb, fmt, ap);
@@ -2260,6 +2414,26 @@ __rep_print_message(env, eid, rp, str, flags)
 		FLD_SET(verbflag, DB_VERB_REP_MISC);
 		type = "all_req";
 		break;
+	case REP_BLOB_ALL_REQ:
+		FLD_SET(verbflag, DB_VERB_REP_MISC);
+		type = "all_blob_req";
+		break;
+	case REP_BLOB_CHUNK:
+		FLD_SET(verbflag, DB_VERB_REP_MISC);
+		type = "blob_chunk";
+		break;
+	case REP_BLOB_CHUNK_REQ:
+		FLD_SET(verbflag, DB_VERB_REP_MISC);
+		type = "blob_chunk_req";
+		break;
+	case REP_BLOB_UPDATE:
+		FLD_SET(verbflag, DB_VERB_REP_MISC);
+		type = "blob_update";
+		break;
+	case REP_BLOB_UPDATE_REQ:
+		FLD_SET(verbflag, DB_VERB_REP_MISC);
+		type = "blob_update_req";
+		break;
 	case REP_BULK_LOG:
 		FLD_SET(verbflag, DB_VERB_REP_MISC);
 		type = "bulk_log";
@@ -2650,9 +2824,19 @@ __rep_log_backup(env, logc, lsn, match)
 		 */
 		if ((match == REP_REC_COMMIT &&
 		    rectype == DB___txn_regop) ||
-		    (match == REP_REC_PERM &&
-		    (rectype == DB___txn_ckp || rectype == DB___txn_regop)))
+		    ((match == REP_REC_PERM || match == REP_REC_PERM_DEL) &&
+		    IS_PERM_RECTYPE(rectype)))
 			break;
+		/*
+		 * Break early if a file remove is discovered in the logs.
+		 * BDB cannot restore a deleted database or blob file from
+		 * logs, so trigger internal init to recover the file.
+		 * Used by Instant Internal Init in replication.
+		 */
+		if (match == REP_REC_PERM_DEL && rectype == DB___fop_remove) {
+			ret = DB_NOTFOUND;
+			break;
+		}
 	}
 	return (ret);
 }
@@ -2671,7 +2855,6 @@ __rep_get_maxpermlsn(env, max_perm_lsnp)
 {
 	DB_LOG *dblp;
 	DB_REP *db_rep;
-	DB_THREAD_INFO *ip;
 	LOG *lp;
 	REP *rep;
 
@@ -2680,11 +2863,9 @@ __rep_get_maxpermlsn(env, max_perm_lsnp)
 	dblp = env->lg_handle;
 	lp = dblp->reginfo.primary;
 
-	ENV_ENTER(env, ip);
 	MUTEX_LOCK(env, rep->mtx_clientdb);
 	*max_perm_lsnp = lp->max_perm_lsn;
 	MUTEX_UNLOCK(env, rep->mtx_clientdb);
-	ENV_LEAVE(env, ip);
 	return (0);
 }
 
@@ -2724,12 +2905,13 @@ __rep_get_datagen(env, data_genp)
 	u_int8_t data_buf[__REP_LSN_HIST_DATA_SIZE];
 	DBT key_dbt, data_dbt;
 	u_int32_t flags;
-	int ret, t_ret, tries;
+	int ret, t_ret, tries, was_open;
 
 	db_rep = env->rep_handle;
 	ret = 0;
 	*data_genp = 0;
 	tries = 0;
+	was_open = 0;
 	flags = DB_LAST;
 retry:
 	if ((ret = __txn_begin(env, NULL, NULL, &txn, DB_IGNORE_LEASE)) != 0)
@@ -2746,10 +2928,10 @@ retry:
 			 * That is not an error.
 			 */
 			ret = 0;
-			goto out;
+			goto noclose;
 		}
-		db_rep->lsn_db = dbp;
-	}
+	} else
+		was_open = 1;
 
 	if ((ret = __db_cursor(dbp, NULL, txn, &dbc, 0)) != 0)
 		goto out;
@@ -2784,8 +2966,126 @@ retry:
 	    &key, key_buf, __REP_LSN_HIST_KEY_SIZE, NULL)) == 0)
 		*data_genp = key.gen;
 out:
+	if (!was_open && dbp != NULL &&
+	    (t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0)
+		ret = t_ret;
+noclose:
 	if ((t_ret = __txn_commit(txn, DB_TXN_NOSYNC)) != 0 && ret == 0)
 		ret = t_ret;
 err:
 	return (ret);
 }
+
+/*
+ * __rep_become_readonly_master --
+ *
+ * Put this master into a state where it no longer accepts writes but it
+ * is still a master that can respond to requests for missing messages.
+ * It fills in sync_lsn to provide a mechanism to know the LSN of the
+ * next log record expected on this site.  Generally, this site should
+ * be restarted as a client shortly after becoming a readonly master.
+ *
+ * PUBLIC: int __rep_become_readonly_master
+ * PUBLIC:      __P((ENV *, u_int32_t *, DB_LSN *));
+ */
+int
+__rep_become_readonly_master(env, gen, sync_lsnp)
+	ENV *env;
+	u_int32_t *gen;
+	DB_LSN *sync_lsnp;
+{
+	DB_LOG *dblp;
+	DB_REP *db_rep;
+	LOG *lp;
+	REP *rep;
+	int locked, ret;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	dblp = env->lg_handle;
+	lp = dblp->reginfo.primary;
+	*gen = 0;
+	ZERO_LSN(*sync_lsnp);
+	ret = 0;
+	locked = 0;
+
+	REP_SYSTEM_LOCK(env);
+	/*
+	 * Lock out replication message thread processing so that replication
+	 * world won't change (e.g. restart, client sync).
+	 */
+	if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_MSG)) {
+		/* There is already someone in msg lockout, return. */
+		RPRINT(env, (env, DB_VERB_REP_MISC,
+		    "Readonly master: thread already in msg lockout"));
+		goto errunlock;
+	} else if ((ret = __rep_lockout_msg(env, rep, 0)) != 0)
+		goto errclearlockouts;
+
+	/*
+	 * Lock out API to wait for active txn/mpool operations to complete
+	 * and prevent new ones from starting.
+	 */
+	if ((ret = __rep_lockout_api(env, rep)) != 0)
+		goto errclearlockouts;
+	locked = 1;
+
+	/* Make this site a readonly master and get master generation. */
+	F_SET(rep, REP_F_READONLY_MASTER);
+	*gen = rep->gen;
+	REP_SYSTEM_UNLOCK(env);
+
+	/* Get the next log record the logging subsystem expects to write. */
+	LOG_SYSTEM_LOCK(env);
+	*sync_lsnp = lp->lsn;
+	LOG_SYSTEM_UNLOCK(env);
+
+	REP_SYSTEM_LOCK(env);
+errclearlockouts:
+	FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG);
+	if (locked)
+		CLR_LOCKOUT_BDB(rep);
+errunlock:
+	REP_SYSTEM_UNLOCK(env);
+	return (ret);
+}
+
+/*
+ * __rep_get_lsnhist_data --
+ *
+ * A utility function to get the full LSN history database record for a
+ * particular gen.
+ *
+ * PUBLIC: int __rep_get_lsnhist_data __P((ENV *, DB_THREAD_INFO *,
+ * PUBLIC:     u_int32_t, __rep_lsn_hist_data_args *));
+ */
+int
+__rep_get_lsnhist_data(env, ip, gen, lsnhist_data)
+	ENV *env;
+	DB_THREAD_INFO *ip;
+	u_int32_t gen;
+	__rep_lsn_hist_data_args *lsnhist_data;
+{
+	DB_TXN *txn;
+	DBC *dbc;
+	struct rep_waitgoal reason;
+	int ret, t_ret;
+
+	txn = NULL;
+	dbc = NULL;
+
+	/*
+	 * Cannot use cached LSN history values because we need the
+	 * timestamp value here, which is not cached.
+	 */
+	ret = __rep_read_lsn_history(env,
+	    ip, &txn, &dbc, gen, lsnhist_data, &reason, DB_SET, 0);
+
+	if (dbc != NULL &&
+	    (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+	if (txn != NULL &&
+	    (t_ret = __db_txn_auto_resolve(env, txn, 1, ret)) != 0 && ret == 0)
+		ret = t_ret;
+	return (ret);
+}
diff --git a/src/rep/rep_verify.c b/src/rep/rep_verify.c
index 5238f900..40a0dfce 100644
--- a/src/rep/rep_verify.c
+++ b/src/rep/rep_verify.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2004, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2004, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -119,8 +119,15 @@ __rep_verify(env, rp, rec, eid, savetime)
 					goto out;
 			}
 		}
+		/*
+		 * Search for a matching perm record.  If none is found,
+		 * or a database or file delete is encountered before the
+		 * perm record, begin internal init.  Database and blob file
+		 * deletes cannot be undone once committed, so internal init
+		 * must be used to re-create the files.
+		 */
 		if ((ret = __rep_log_backup(env, logc, &lsn,
-		    REP_REC_PERM)) == 0) {
+		    REP_REC_PERM_DEL)) == 0) {
 			MUTEX_LOCK(env, rep->mtx_clientdb);
 			lp->verify_lsn = lsn;
 			__os_gettime(env, &lp->rcvd_ts, 1);
@@ -205,8 +212,10 @@ __rep_internal_init(env, abbrev)
 	u_int32_t abbrev;
 {
 	REP *rep;
+	u_int32_t ctlflags;
 	int master, ret;
 
+	ctlflags = 0;
 	rep = env->rep_handle->region;
 	REP_SYSTEM_LOCK(env);
 #ifdef HAVE_STATISTICS
@@ -227,6 +236,7 @@ __rep_internal_init(env, abbrev)
 			RPRINT(env, (env, DB_VERB_REP_SYNC,
 			 "send UPDATE_REQ, merely to check for NIMDB refresh"));
 			F_SET(rep, REP_F_ABBREVIATED);
+			FLD_SET(ctlflags, REPCTL_INMEM_ONLY);
 		} else
 			F_CLR(rep, REP_F_ABBREVIATED);
 		ZERO_LSN(rep->first_lsn);
@@ -237,7 +247,7 @@ __rep_internal_init(env, abbrev)
 	REP_SYSTEM_UNLOCK(env);
 	if (ret == 0 && master != DB_EID_INVALID)
 		(void)__rep_send_message(env,
-		    master, REP_UPDATE_REQ, NULL, NULL, 0, 0);
+		    master, REP_UPDATE_REQ, NULL, NULL, ctlflags, 0);
 	return (ret);
 }
 
@@ -504,8 +514,7 @@ __rep_dorecovery(env, lsnp, trunclsnp)
 		 */
 		DB_ASSERT(env, rep->op_cnt == 0);
 		DB_ASSERT(env, rep->msg_th == 1);
-		if (rectype == DB___txn_regop || rectype == DB___txn_ckp ||
-		    rectype == DB___dbreg_register)
+		if (IS_PERM_RECTYPE(rectype) || rectype == DB___dbreg_register)
 			skip_rec = 0;
 		if (rectype == DB___txn_regop) {
 			if (rep->version >= DB_REPVERSION_44) {
@@ -653,8 +662,10 @@ __rep_verify_match(env, reclsnp, savetime)
 	/*
 	 * Lockout the API and wait for operations to complete.
 	 */
-	if ((ret = __rep_lockout_api(env, rep)) != 0)
+	if ((ret = __rep_lockout_api(env, rep)) != 0) {
+		FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG);
 		goto errunlock;
+	}
 
 	/* OK, everyone is out, we can now run recovery. */
 	REP_SYSTEM_UNLOCK(env);
@@ -690,6 +701,10 @@ __rep_verify_match(env, reclsnp, savetime)
 	 */
 	if (db_rep->rep_db == NULL &&
 	    (ret = __rep_client_dbinit(env, 0, REP_DB)) != 0) {
+		REP_SYSTEM_LOCK(env);
+		FLD_CLR(rep->lockout_flags,
+		    REP_LOCKOUT_API | REP_LOCKOUT_MSG | REP_LOCKOUT_OP);
+		REP_SYSTEM_UNLOCK(env);
 		MUTEX_UNLOCK(env, rep->mtx_clientdb);
 		goto out;
 	}
diff --git a/src/repmgr/repmgr.msg b/src/repmgr/repmgr.msg
index 020f2e9c..ba544936 100644
--- a/src/repmgr/repmgr.msg
+++ b/src/repmgr/repmgr.msg
@@ -65,6 +65,11 @@ ARG	port		u_int16_t
 END
 
 BEGIN_MSG membership_data
+ARG	status          u_int32_t
+ARG	flags           u_int32_t
+END
+
+BEGIN_MSG v4membership_data
 ARG	flags           u_int32_t
 END
 
@@ -98,22 +103,51 @@ BEGIN_MSG membr_vers
 ARG	version		u_int32_t
 ARG	gen		u_int32_t
 END
+
 BEGIN_MSG site_info check_length
 ARG	host		DBT
 ARG	port		u_int16_t
+ARG	status		u_int32_t
+ARG	flags		u_int32_t
+END
+
+BEGIN_MSG v4site_info check_length
+ARG	host		DBT
+ARG	port		u_int16_t
 ARG	flags		u_int32_t
 END
 
 /*
  * If site A breaks or rejects a connection from site B, it first
  * tries to send B this message containing site A's currently known
- * membership DB version.  Site B can use this to decide what to do.
- * If site B knows of a later version, it should retry the connection
- * to site A later, polling at it until site A catches up.  However, if
- * site B's known version is less, it means that site B is no longer in 
- * the group, and so instead it should shut down and notify the application.
+ * membership DB version and site B's status in site A's membership DB.
+ * Site B can use them to decide what to do.  If site B knows of a later
+ * version, it should retry the connection to site A later, polling
+ * until site A catches up.  However, if site B's known version is
+ * less and site B's status is adding in site A's membership DB, it
+ * means that a badly-timed change of master may have caused the current
+ * master to lose B's membership DB update to present, so it should
+ * retry the connection to site A later, otherwise, site B is no longer
+ * in the group and it should shut down and notify the application.
  */
 BEGIN_MSG connect_reject
 ARG	version		u_int32_t
 ARG	gen		u_int32_t
+ARG	status		u_int32_t
+END
+
+BEGIN_MSG v4connect_reject
+ARG	version		u_int32_t
+ARG	gen		u_int32_t
+END
+
+/*
+ * For preferred master LSN history comparison between the sites.
+ * The next_gen_lsn is [0,0] if the next generation doesn't yet exist.
+ */
+BEGIN_MSG lsnhist_match
+ARG	lsn		DB_LSN
+ARG	hist_sec	u_int32_t
+ARG	hist_nsec	u_int32_t
+ARG	next_gen_lsn	DB_LSN
 END
diff --git a/src/repmgr/repmgr.src b/src/repmgr/repmgr.src
index 68d8c239..f42e159f 100644
--- a/src/repmgr/repmgr.src
+++ b/src/repmgr/repmgr.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2010, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2010, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 
 DBPRIVATE
diff --git a/src/repmgr/repmgr_automsg.c b/src/repmgr/repmgr_automsg.c
index 90af08ff..31bc4c35 100644
--- a/src/repmgr/repmgr_automsg.c
+++ b/src/repmgr/repmgr_automsg.c
@@ -463,6 +463,7 @@ __repmgr_membership_data_marshal(env, argp, bp)
 	__repmgr_membership_data_args *argp;
 	u_int8_t *bp;
 {
+	DB_HTONL_COPYOUT(env, bp, argp->status);
 	DB_HTONL_COPYOUT(env, bp, argp->flags);
 }
 
@@ -481,6 +482,7 @@ __repmgr_membership_data_unmarshal(env, argp, bp, max, nextp)
 {
 	if (max < __REPMGR_MEMBERSHIP_DATA_SIZE)
 		goto too_few;
+	DB_NTOHL_COPYIN(env, argp->status, bp);
 	DB_NTOHL_COPYIN(env, argp->flags, bp);
 
 	if (nextp != NULL)
@@ -494,6 +496,46 @@ too_few:
 }
 
 /*
+ * PUBLIC: void __repmgr_v4membership_data_marshal __P((ENV *,
+ * PUBLIC:	 __repmgr_v4membership_data_args *, u_int8_t *));
+ */
+void
+__repmgr_v4membership_data_marshal(env, argp, bp)
+	ENV *env;
+	__repmgr_v4membership_data_args *argp;
+	u_int8_t *bp;
+{
+	DB_HTONL_COPYOUT(env, bp, argp->flags);
+}
+
+/*
+ * PUBLIC: int __repmgr_v4membership_data_unmarshal __P((ENV *,
+ * PUBLIC:	 __repmgr_v4membership_data_args *, u_int8_t *, size_t,
+ * PUBLIC:	 u_int8_t **));
+ */
+int
+__repmgr_v4membership_data_unmarshal(env, argp, bp, max, nextp)
+	ENV *env;
+	__repmgr_v4membership_data_args *argp;
+	u_int8_t *bp;
+	size_t max;
+	u_int8_t **nextp;
+{
+	if (max < __REPMGR_V4MEMBERSHIP_DATA_SIZE)
+		goto too_few;
+	DB_NTOHL_COPYIN(env, argp->flags, bp);
+
+	if (nextp != NULL)
+		*nextp = bp;
+	return (0);
+
+too_few:
+	__db_errx(env, DB_STR("3675",
+	    "Not enough input bytes to fill a __repmgr_v4membership_data message"));
+	return (EINVAL);
+}
+
+/*
  * PUBLIC: void __repmgr_member_metadata_marshal __P((ENV *,
  * PUBLIC:	 __repmgr_member_metadata_args *, u_int8_t *));
  */
@@ -669,6 +711,7 @@ __repmgr_site_info_marshal(env, argp, bp, max, lenp)
 		bp += argp->host.size;
 	}
 	DB_HTONS_COPYOUT(env, bp, argp->port);
+	DB_HTONL_COPYOUT(env, bp, argp->status);
 	DB_HTONL_COPYOUT(env, bp, argp->flags);
 
 	*lenp = (size_t)(bp - start);
@@ -702,6 +745,7 @@ __repmgr_site_info_unmarshal(env, argp, bp, max, nextp)
 		goto too_few;
 	bp += argp->host.size;
 	DB_NTOHS_COPYIN(env, argp->port, bp);
+	DB_NTOHL_COPYIN(env, argp->status, bp);
 	DB_NTOHL_COPYIN(env, argp->flags, bp);
 
 	if (nextp != NULL)
@@ -715,6 +759,75 @@ too_few:
 }
 
 /*
+ * PUBLIC: int __repmgr_v4site_info_marshal __P((ENV *,
+ * PUBLIC:	 __repmgr_v4site_info_args *, u_int8_t *, size_t, size_t *));
+ */
+int
+__repmgr_v4site_info_marshal(env, argp, bp, max, lenp)
+	ENV *env;
+	__repmgr_v4site_info_args *argp;
+	u_int8_t *bp;
+	size_t *lenp, max;
+{
+	u_int8_t *start;
+
+	if (max < __REPMGR_V4SITE_INFO_SIZE
+	    + (size_t)argp->host.size)
+		return (ENOMEM);
+	start = bp;
+
+	DB_HTONL_COPYOUT(env, bp, argp->host.size);
+	if (argp->host.size > 0) {
+		memcpy(bp, argp->host.data, argp->host.size);
+		bp += argp->host.size;
+	}
+	DB_HTONS_COPYOUT(env, bp, argp->port);
+	DB_HTONL_COPYOUT(env, bp, argp->flags);
+
+	*lenp = (size_t)(bp - start);
+	return (0);
+}
+
+/*
+ * PUBLIC: int __repmgr_v4site_info_unmarshal __P((ENV *,
+ * PUBLIC:	 __repmgr_v4site_info_args *, u_int8_t *, size_t, u_int8_t **));
+ */
+int
+__repmgr_v4site_info_unmarshal(env, argp, bp, max, nextp)
+	ENV *env;
+	__repmgr_v4site_info_args *argp;
+	u_int8_t *bp;
+	size_t max;
+	u_int8_t **nextp;
+{
+	size_t needed;
+
+	needed = __REPMGR_V4SITE_INFO_SIZE;
+	if (max < needed)
+		goto too_few;
+	DB_NTOHL_COPYIN(env, argp->host.size, bp);
+	if (argp->host.size == 0)
+		argp->host.data = NULL;
+	else
+		argp->host.data = bp;
+	needed += (size_t)argp->host.size;
+	if (max < needed)
+		goto too_few;
+	bp += argp->host.size;
+	DB_NTOHS_COPYIN(env, argp->port, bp);
+	DB_NTOHL_COPYIN(env, argp->flags, bp);
+
+	if (nextp != NULL)
+		*nextp = bp;
+	return (0);
+
+too_few:
+	__db_errx(env, DB_STR("3675",
+	    "Not enough input bytes to fill a __repmgr_v4site_info message"));
+	return (EINVAL);
+}
+
+/*
  * PUBLIC: void __repmgr_connect_reject_marshal __P((ENV *,
  * PUBLIC:	 __repmgr_connect_reject_args *, u_int8_t *));
  */
@@ -726,6 +839,7 @@ __repmgr_connect_reject_marshal(env, argp, bp)
 {
 	DB_HTONL_COPYOUT(env, bp, argp->version);
 	DB_HTONL_COPYOUT(env, bp, argp->gen);
+	DB_HTONL_COPYOUT(env, bp, argp->status);
 }
 
 /*
@@ -744,6 +858,7 @@ __repmgr_connect_reject_unmarshal(env, argp, bp, max, nextp)
 		goto too_few;
 	DB_NTOHL_COPYIN(env, argp->version, bp);
 	DB_NTOHL_COPYIN(env, argp->gen, bp);
+	DB_NTOHL_COPYIN(env, argp->status, bp);
 
 	if (nextp != NULL)
 		*nextp = bp;
@@ -755,3 +870,94 @@ too_few:
 	return (EINVAL);
 }
 
+/*
+ * PUBLIC: void __repmgr_v4connect_reject_marshal __P((ENV *,
+ * PUBLIC:	 __repmgr_v4connect_reject_args *, u_int8_t *));
+ */
+void
+__repmgr_v4connect_reject_marshal(env, argp, bp)
+	ENV *env;
+	__repmgr_v4connect_reject_args *argp;
+	u_int8_t *bp;
+{
+	DB_HTONL_COPYOUT(env, bp, argp->version);
+	DB_HTONL_COPYOUT(env, bp, argp->gen);
+}
+
+/*
+ * PUBLIC: int __repmgr_v4connect_reject_unmarshal __P((ENV *,
+ * PUBLIC:	 __repmgr_v4connect_reject_args *, u_int8_t *, size_t,
+ * PUBLIC:	 u_int8_t **));
+ */
+int
+__repmgr_v4connect_reject_unmarshal(env, argp, bp, max, nextp)
+	ENV *env;
+	__repmgr_v4connect_reject_args *argp;
+	u_int8_t *bp;
+	size_t max;
+	u_int8_t **nextp;
+{
+	if (max < __REPMGR_V4CONNECT_REJECT_SIZE)
+		goto too_few;
+	DB_NTOHL_COPYIN(env, argp->version, bp);
+	DB_NTOHL_COPYIN(env, argp->gen, bp);
+
+	if (nextp != NULL)
+		*nextp = bp;
+	return (0);
+
+too_few:
+	__db_errx(env, DB_STR("3675",
+	    "Not enough input bytes to fill a __repmgr_v4connect_reject message"));
+	return (EINVAL);
+}
+
+/*
+ * PUBLIC: void __repmgr_lsnhist_match_marshal __P((ENV *,
+ * PUBLIC:	 __repmgr_lsnhist_match_args *, u_int8_t *));
+ */
+void
+__repmgr_lsnhist_match_marshal(env, argp, bp)
+	ENV *env;
+	__repmgr_lsnhist_match_args *argp;
+	u_int8_t *bp;
+{
+	DB_HTONL_COPYOUT(env, bp, argp->lsn.file);
+	DB_HTONL_COPYOUT(env, bp, argp->lsn.offset);
+	DB_HTONL_COPYOUT(env, bp, argp->hist_sec);
+	DB_HTONL_COPYOUT(env, bp, argp->hist_nsec);
+	DB_HTONL_COPYOUT(env, bp, argp->next_gen_lsn.file);
+	DB_HTONL_COPYOUT(env, bp, argp->next_gen_lsn.offset);
+}
+
+/*
+ * PUBLIC: int __repmgr_lsnhist_match_unmarshal __P((ENV *,
+ * PUBLIC:	 __repmgr_lsnhist_match_args *, u_int8_t *, size_t, u_int8_t **));
+ */
+int
+__repmgr_lsnhist_match_unmarshal(env, argp, bp, max, nextp)
+	ENV *env;
+	__repmgr_lsnhist_match_args *argp;
+	u_int8_t *bp;
+	size_t max;
+	u_int8_t **nextp;
+{
+	if (max < __REPMGR_LSNHIST_MATCH_SIZE)
+		goto too_few;
+	DB_NTOHL_COPYIN(env, argp->lsn.file, bp);
+	DB_NTOHL_COPYIN(env, argp->lsn.offset, bp);
+	DB_NTOHL_COPYIN(env, argp->hist_sec, bp);
+	DB_NTOHL_COPYIN(env, argp->hist_nsec, bp);
+	DB_NTOHL_COPYIN(env, argp->next_gen_lsn.file, bp);
+	DB_NTOHL_COPYIN(env, argp->next_gen_lsn.offset, bp);
+
+	if (nextp != NULL)
+		*nextp = bp;
+	return (0);
+
+too_few:
+	__db_errx(env, DB_STR("3675",
+	    "Not enough input bytes to fill a __repmgr_lsnhist_match message"));
+	return (EINVAL);
+}
+
diff --git a/src/repmgr/repmgr_elect.c b/src/repmgr/repmgr_elect.c
index 3a84694a..15a2de7b 100644
--- a/src/repmgr/repmgr_elect.c
+++ b/src/repmgr/repmgr_elect.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -12,9 +12,9 @@
 
 static db_timeout_t __repmgr_compute_response_time __P((ENV *));
 static int __repmgr_elect __P((ENV *, u_int32_t, db_timespec *));
-static int __repmgr_elect_main __P((ENV *, REPMGR_RUNNABLE *));
+static int __repmgr_elect_main __P((ENV *,
+    DB_THREAD_INFO *, REPMGR_RUNNABLE *));
 static void *__repmgr_elect_thread __P((void *));
-static int send_membership __P((ENV *));
 
 /*
  * Starts an election thread.
@@ -90,26 +90,39 @@ __repmgr_elect_thread(argsp)
 {
 	REPMGR_RUNNABLE *th;
 	ENV *env;
+	DB_THREAD_INFO *ip;
 	int ret;
 
 	th = argsp;
 	env = th->env;
+	ip = NULL;
+	ret = 0;
 
-	RPRINT(env, (env, DB_VERB_REPMGR_MISC, "starting election thread"));
+	ENV_ENTER_RET(env, ip, ret);
+	if (ret == 0)
+		RPRINT(env, (env,
+		    DB_VERB_REPMGR_MISC, "starting election thread"));
 
-	if ((ret = __repmgr_elect_main(env, th)) != 0) {
+	if (ret != 0 || (ret = __repmgr_elect_main(env, ip, th)) != 0) {
 		__db_err(env, ret, "election thread failed");
+		RPRINT(env, (env,
+		    DB_VERB_REPMGR_MISC, "election thread is exiting"));
+		ENV_LEAVE(env, ip);
 		(void)__repmgr_thread_failure(env, ret);
 	}
-
-	RPRINT(env, (env, DB_VERB_REPMGR_MISC, "election thread is exiting"));
+	if (ret == 0) {
+		RPRINT(env, (env,
+		    DB_VERB_REPMGR_MISC, "election thread is exiting"));
+		ENV_LEAVE(env, ip);
+	}
 	th->finished = TRUE;
 	return (NULL);
 }
 
 static int
-__repmgr_elect_main(env, th)
+__repmgr_elect_main(env, ip, th)
 	ENV *env;
+	DB_THREAD_INFO *ip;
 	REPMGR_RUNNABLE *th;
 {
 	DB_REP *db_rep;
@@ -123,10 +136,13 @@ __repmgr_elect_main(env, th)
 	db_timespec failtime, now, repstart_time, target, wait_til;
 	db_timeout_t delay_time, response_time, tmp_time;
 	u_long sec, usec;
-	u_int32_t flags;
-	int done_repstart, ret, suppress_election;
+	u_int32_t flags, max_tries, tries;
+	int client_detected, done_repstart, lsnhist_match, master_detected;
+	int ret, suppress_election;
 	enum { ELECTION, REPSTART } action;
 
+	COMPQUIET(usec, 0);
+	COMPQUIET(max_tries, 0);
 	COMPQUIET(action, ELECTION);
 
 	db_rep = env->rep_handle;
@@ -181,6 +197,120 @@ __repmgr_elect_main(env, th)
 	UNLOCK_MUTEX(db_rep->mutex);
 
 	/*
+	 * In preferred master mode, the select thread signals when a
+	 * client has lost its connection to the master via prefmas_pending,
+	 * but the actual restart as temporary master is done here in an
+	 * election thread.
+	 */
+	if (IS_PREFMAS_MODE(env) && F_ISSET(rep, REP_F_CLIENT) &&
+	    db_rep->prefmas_pending == start_temp_master) {
+		db_rep->prefmas_pending = no_action;
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "elect_main preferred master restart temp master"));
+		ret = __repmgr_become_master(env, 0);
+		goto out;
+	}
+
+	/* Get preferred master wait limits for detecting the other site. */
+	if (IS_PREFMAS_MODE(env) &&
+	    (ret = __repmgr_prefmas_get_wait(env, &max_tries, &usec)) != 0)
+		goto out;
+
+	/* Preferred master mode master site start-up. */
+	if (IS_PREFMAS_MODE(env) &&
+	    FLD_ISSET(rep->config, REP_C_PREFMAS_MASTER) &&
+	    LF_ISSET(ELECT_F_STARTUP)) {
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "elect_main preferred master site startup"));
+		client_detected = FALSE;
+		lsnhist_match = FALSE;
+		tries = 0;
+		while (!client_detected && tries < max_tries) {
+			__os_yield(env, 0, usec);
+			tries++;
+			client_detected = __repmgr_prefmas_connected(env);
+		}
+		if (client_detected) {
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "elect_main preferred master client detected"));
+			/*
+			 * Restart remote site as a client.  Depending on the
+			 * outcome of lsnhist_match below, this site will
+			 * either restart as master or it will start an
+			 * election.  In either case, the remote site should
+			 * be running as a client.
+			 *
+			 * Then perform the lsnhist_match comparison.
+			 */
+			if ((ret = __repmgr_restart_site_as_client(
+			    env, 1)) != 0 ||
+			    (ret = __repmgr_lsnhist_match(env,
+			    ip, 1, &lsnhist_match)) != 0)
+				goto out;
+			/*
+			 * An lsnhist_match means that we have a continuous
+			 * set of transactions and it is safe to call a
+			 * comparison election to preserve any temporary master
+			 * transactions that were committed while this site
+			 * was down.
+			 */
+			if (lsnhist_match) {
+				F_CLR(rep, REP_F_HOLD_GEN);
+				LF_SET(ELECT_F_IMMED);
+				LF_CLR(ELECT_F_STARTUP);
+				/* Continue on to election code below. */
+			}
+		}
+		/*
+		 * If we didn't detect a client within a reasonable time or
+		 * we failed the lsnhist_match (meaning we have conflicting
+		 * sets of transactions), we start this site as a master and
+		 * possibly force rollback of temporary master transactions.
+		 */
+		if (!client_detected || !lsnhist_match) {
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "elect_main preferred master site start master"));
+			ret = __repmgr_become_master(env, 0);
+			F_CLR(rep, REP_F_HOLD_GEN);
+			goto out;
+		}
+	}
+
+	/* Preferred master mode client site start-up. */
+	if (IS_PREFMAS_MODE(env) &&
+	    FLD_ISSET(rep->config, REP_C_PREFMAS_CLIENT) &&
+	    LF_ISSET(ELECT_F_STARTUP)) {
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "elect_main preferred master client site startup"));
+		master_detected = FALSE;
+		tries = 0;
+		while (!master_detected && tries < max_tries) {
+			__os_yield(env, 0, usec);
+			tries++;
+			master_detected = __repmgr_prefmas_connected(env);
+		}
+		/*
+		 * If we find the master, restart as client here so that we
+		 * send a newclient message after we are connected to the
+		 * master.  The master will send a newmaster message so that
+		 * we can start the client sync process.
+		 *
+		 * If we haven't found the master after the timeout, start as
+		 * temporary master.
+		 */
+		if (master_detected) {
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "elect_main preferred master detected"));
+			ret = __repmgr_become_client(env);
+		} else {
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "elect_main preferred master client start master"));
+			ret = __repmgr_become_master(env, 0);
+		}
+		goto out;
+	}
+
+	/*
 	 * The 'done_repstart' flag keeps track of which was our most recent
 	 * operation (repstart or election), so that we can alternate
 	 * appropriately.  There are a few different ways this thread can be
@@ -188,7 +318,7 @@ __repmgr_elect_main(env, th)
 	 * called.  The one exception is at initial start-up, where we
 	 * first probe for a master by sending out rep_start(CLIENT) calls.
 	 */
-	if (LF_ISSET(ELECT_F_IMMED)) {
+	if (LF_ISSET(ELECT_F_IMMED) && !IS_VIEW_SITE(env)) {
 		/*
 		 * When the election succeeds, we've successfully completed
 		 * everything we need to do.  If it fails in an unexpected way,
@@ -256,11 +386,13 @@ __repmgr_elect_main(env, th)
 		/*
 		 * See if it's time to retry the operation.  Normally it's an
 		 * election we're interested in retrying.  But we refrain from
-		 * calling for elections if so configured.
+		 * calling for elections if so configured or we are a view.
 		 */
-		suppress_election = LF_ISSET(ELECT_F_STARTUP) ?
+		suppress_election = IS_VIEW_SITE(env) ||
+		    (LF_ISSET(ELECT_F_STARTUP) ?
 		    db_rep->init_policy == DB_REP_CLIENT :
-		    !FLD_ISSET(rep->config, REP_C_ELECTIONS);
+		    !FLD_ISSET(rep->config, REP_C_ELECTIONS)) ||
+		    LF_ISSET(ELECT_F_CLIENT_RESTART);
 		repstart_time = db_rep->repstart_time;
 		target = suppress_election ? repstart_time : failtime;
 		TIMESPEC_ADD_DB_TIMEOUT(&target, rep->election_retry_wait);
@@ -343,7 +475,8 @@ __repmgr_elect_main(env, th)
 			DB_ASSERT(env, action == REPSTART);
 
 			db_rep->new_connection = FALSE;
-			if ((ret = __repmgr_repstart(env, DB_REP_CLIENT)) != 0)
+			if ((ret = __repmgr_repstart(env,
+			    DB_REP_CLIENT, 0)) != 0)
 				goto out;
 			done_repstart = TRUE;
 
@@ -476,7 +609,20 @@ __repmgr_elect(env, flags, failtimep)
 	case DB_REP_UNAVAIL:
 		__os_gettime(env, failtimep, 1);
 		DB_EVENT(env, DB_EVENT_REP_ELECTION_FAILED, NULL);
-		if ((t_ret = send_membership(env)) != 0)
+		/*
+		 * If an election fails with DB_REP_UNAVAIL, it could be
+		 * because a participating site has an obsolete, too-high
+		 * notion of the group size.  (This could happen if the site
+		 * was down/disconnected during removal of some (other) sites.)
+		 * To remedy this, broadcast a current copy of the membership
+		 * list.  Since all sites are doing this, and we always ratchet
+		 * to the most up-to-date version, this should bring all sites
+		 * up to date.  We only do this after a failure, during what
+		 * will normally be an idle period anyway, so that we don't
+		 * slow down a first election following the loss of an active
+		 * master.
+		 */
+		if ((t_ret = __repmgr_bcast_member_list(env)) != 0)
 			ret = t_ret;
 		break;
 
@@ -498,40 +644,6 @@ __repmgr_elect(env, flags, failtimep)
 }
 
 /*
- * If an election fails with DB_REP_UNAVAIL, it could be because a participating
- * site has an obsolete, too-high notion of the group size.  (This could happen
- * if the site was down/disconnected during removal of some (other) sites.)  To
- * remedy this, broadcast a current copy of the membership list.  Since all
- * sites are doing this, and we always ratchet to the most up-to-date version,
- * this should bring all sites up to date.  We only do this after a failure,
- * during what will normally be an idle period anyway, so that we don't slow
- * down a first election following the loss of an active master.
- */
-static int
-send_membership(env)
-	ENV *env;
-{
-	DB_REP *db_rep;
-	u_int8_t *buf;
-	size_t len;
-	int ret;
-
-	db_rep = env->rep_handle;
-	buf = NULL;
-	LOCK_MUTEX(db_rep->mutex);
-	if ((ret = __repmgr_marshal_member_list(env, &buf, &len)) != 0)
-		goto out;
-	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
-	    "Broadcast latest membership list"));
-	ret = __repmgr_bcast_own_msg(env, REPMGR_SHARING, buf, len);
-out:
-	UNLOCK_MUTEX(db_rep->mutex);
-	if (buf != NULL)
-		__os_free(env, buf);
-	return (ret);
-}
-
-/*
  * Becomes master after we've won an election, if we can.
  *
  * PUBLIC: int __repmgr_claim_victory __P((ENV *));
@@ -543,7 +655,7 @@ __repmgr_claim_victory(env)
 	int ret;
 
 	env->rep_handle->takeover_pending = FALSE;
-	if ((ret = __repmgr_become_master(env)) == DB_REP_UNAVAIL) {
+	if ((ret = __repmgr_become_master(env, 0)) == DB_REP_UNAVAIL) {
 		ret = 0;
 		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
 		    "Won election but lost race with DUPMASTER client intent"));
diff --git a/src/repmgr/repmgr_method.c b/src/repmgr/repmgr_method.c
index 229cf650..729ba5ff 100644
--- a/src/repmgr/repmgr_method.c
+++ b/src/repmgr/repmgr_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -29,19 +29,17 @@ static int get_channel_connection __P((CHANNEL *, REPMGR_CONNECTION **));
 static int init_dbsite __P((ENV *, int, const char *, u_int, DB_SITE **));
 static int join_group_at_site __P((ENV *, repmgr_netaddr_t *));
 static int kick_blockers __P((ENV *, REPMGR_CONNECTION *, void *));
-static int make_request_conn __P((ENV *,
-    repmgr_netaddr_t *, REPMGR_CONNECTION **));
 static int set_local_site __P((DB_SITE *, u_int32_t));
-static int read_own_msg __P((ENV *,
-    REPMGR_CONNECTION *, u_int32_t *, u_int8_t **, size_t *));
 static int refresh_site __P((DB_SITE *));
 static int __repmgr_await_threads __P((ENV *));
 static int __repmgr_build_data_out __P((ENV *,
     DBT *, u_int32_t, __repmgr_msg_metadata_args *, REPMGR_IOVECS **iovecsp));
 static int __repmgr_build_msg_out __P((ENV *,
     DBT *, u_int32_t, __repmgr_msg_metadata_args *, REPMGR_IOVECS **iovecsp));
+static int __repmgr_demote_site(ENV *, int);
 static int repmgr_only __P((ENV *, const char *));
 static int __repmgr_restart __P((ENV *, int, u_int32_t));
+static int __repmgr_remove_and_close_site __P((DB_SITE *));
 static int __repmgr_remove_site __P((DB_SITE *));
 static int __repmgr_remove_site_pp __P((DB_SITE *));
 static int __repmgr_start_msg_threads __P((ENV *, u_int));
@@ -52,25 +50,21 @@ static int send_msg_self __P((ENV *, REPMGR_IOVECS *, u_int32_t));
 static int site_by_addr __P((ENV *, const char *, u_int, DB_SITE **));
 
 /*
- * PUBLIC: int __repmgr_start __P((DB_ENV *, int, u_int32_t));
+ * PUBLIC: int __repmgr_start_pp __P((DB_ENV *, int, u_int32_t));
  */
 int
-__repmgr_start(dbenv, nthreads, flags)
+__repmgr_start_pp(dbenv, nthreads, flags)
 	DB_ENV *dbenv;
 	int nthreads;
 	u_int32_t flags;
 {
 	DB_REP *db_rep;
-	REP *rep;
-	REPMGR_SITE *me, *site;
-	DB_THREAD_INFO *ip;
 	ENV *env;
-	int first, is_listener, locked, min, need_masterseek, ret, start_master;
-	u_int i, n;
+	DB_THREAD_INFO *ip;
+	int ret;
 
 	env = dbenv->env;
 	db_rep = env->rep_handle;
-	rep = db_rep->region;
 
 	switch (flags) {
 	case 0:
@@ -102,7 +96,27 @@ __repmgr_start(dbenv, nthreads, flags)
 		return (EINVAL);
 	}
 
-	/* Check if it is a shut-down site, if so, clean the resources. */
+	/* A view site cannot be started as MASTER or ELECTION. */
+	if (IS_VIEW_SITE(env) &&
+	    (flags == DB_REP_MASTER || flags == DB_REP_ELECTION)) {
+		__db_errx(env, DB_STR("3694",
+		    "A view site must be started with DB_REP_CLIENT"));
+		return (EINVAL);
+	}
+
+	/* Must start site as client in preferred master mode. */
+	if (PREFMAS_IS_SET(env) &&
+	    (flags == DB_REP_MASTER || flags == DB_REP_ELECTION)) {
+		__db_errx(env, DB_STR("3702",
+		    "A preferred master site must be started with "
+		    "DB_REP_CLIENT"));
+		return (EINVAL);
+	}
+
+	/*
+	 * Check if it is a shut-down site, if so, clean the resources and
+	 * reset the status in order to get ready to start replication.
+	 */
 	if (db_rep->repmgr_status == stopped) {
 		if ((ret = __repmgr_stop(env)) != 0) {
 			__db_errx(env, DB_STR("3638",
@@ -112,7 +126,55 @@ __repmgr_start(dbenv, nthreads, flags)
 		db_rep->repmgr_status = ready;
 	}
 
+	/* Record the original configurations given by application. */
+	ENV_ENTER(env, ip);
 	db_rep->init_policy = flags;
+	db_rep->config_nthreads = nthreads;
+	ret = __repmgr_start_int(env, nthreads, flags);
+	ENV_LEAVE(env, ip);
+	return (ret);
+}
+
+/*
+ * Internal processing to start replication manager.
+ *
+ * PUBLIC: int __repmgr_start_int __P((ENV *, int, u_int32_t));
+ */
+int
+__repmgr_start_int(env, nthreads, flags)
+	ENV *env;
+	int nthreads;
+	u_int32_t flags;
+{
+	DB_LOG *dblp;
+	DB_REP *db_rep;
+	LOG *lp;
+	REP *rep;
+	REPMGR_SITE *me, *site;
+	u_int32_t startopts;
+	int first, flags_error, is_listener, locked, min;
+	int need_masterseek, ret, start_master;
+	u_int i, n;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	dblp = env->lg_handle;
+	lp = dblp->reginfo.primary;
+	flags_error = 0;
+	startopts = 0;
+
+	/*
+	 * For preferred master master site startup, we need to save the
+	 * log location at the end of our previous transactions for
+	 * the lsnhist_match comparisons.  Starting repmgr adds a few
+	 * more log records that we don't want to count in lsnhist_match.
+	 */
+	if (FLD_ISSET(rep->config, REP_C_PREFMAS_MASTER)) {
+		LOG_SYSTEM_LOCK(env);
+		db_rep->prefmas_init_lsn = lp->lsn;
+		LOG_SYSTEM_UNLOCK(env);
+	}
+
 	if ((ret = __rep_set_transport_int(env,
 	    db_rep->self_eid, __repmgr_send)) != 0)
 		return (ret);
@@ -128,7 +190,8 @@ __repmgr_start(dbenv, nthreads, flags)
 
 	if (db_rep->restored_list != NULL) {
 		ret = __repmgr_refresh_membership(env,
-		    db_rep->restored_list, db_rep->restored_list_length);
+		    db_rep->restored_list, db_rep->restored_list_length,
+		    DB_REPMGR_VERSION);
 		__os_free(env, db_rep->restored_list);
 		db_rep->restored_list = NULL;
 	} else {
@@ -145,9 +208,15 @@ __repmgr_start(dbenv, nthreads, flags)
 				 * join.
 				 */
 				ret = __repmgr_join_group(env);
+			else if (VIEW_TO_PARTICIPANT(db_rep, me)) {
+				__db_errx(env, DB_STR("3695",
+    "A view site must be started with a view callback"));
+				return (EINVAL);
+			}
 		} else if (ret == ENOENT) {
-			ENV_ENTER(env, ip);
-			if (FLD_ISSET(me->config, DB_GROUP_CREATOR))
+			if (FLD_ISSET(me->config, DB_GROUP_CREATOR) ||
+			    (IS_PREFMAS_MODE(env) &&
+			    FLD_ISSET(rep->config, REP_C_PREFMAS_MASTER)))
 				start_master = TRUE;
 			/*
 			 * LEGACY is inconsistent with CREATOR, but start_master
@@ -166,10 +235,12 @@ __repmgr_start(dbenv, nthreads, flags)
 						continue;
 					if ((ret = __repmgr_set_membership(env,
 					    site->net_addr.host,
-					    site->net_addr.port,
-					    SITE_PRESENT)) != 0)
+					    site->net_addr.port, SITE_PRESENT,
+					    site->gmdb_flags)) != 0)
 						break;
-					n++;
+					if (!FLD_ISSET(site->gmdb_flags,
+					    SITE_VIEW))
+						n++;
 				}
 				ret = __rep_set_nsites_int(env, n);
 				DB_ASSERT(env, ret == 0);
@@ -180,30 +251,27 @@ __repmgr_start(dbenv, nthreads, flags)
 				db_rep->member_version_gen = 1;
 				if ((ret = __repmgr_set_membership(env,
 				    me->net_addr.host, me->net_addr.port,
-				    SITE_PRESENT)) == 0) {
+				    SITE_PRESENT, 0)) == 0) {
 					ret = __rep_set_nsites_int(env, 1);
 					DB_ASSERT(env, ret == 0);
 				}
 				UNLOCK_MUTEX(db_rep->mutex);
 			} else
 				ret = __repmgr_join_group(env);
-			ENV_LEAVE(env, ip);
 		} else if (ret == DB_DELETED)
 			ret = DB_REP_UNAVAIL;
 	}
 	if (ret != 0)
 		return (ret);
 
-	DB_ASSERT(env, start_master ||
-	    SITE_FROM_EID(db_rep->self_eid)->membership == SITE_PRESENT);
-
 	/*
-	 * If we're the first repmgr_start() call, we will have to start threads.
-	 * Therefore, we require a flags value (to tell us how).
+	 * Catch case where user defines a different local site address than
+	 * the one in the restored_list from an ongoing internal init.
 	 */
-	if (db_rep->repmgr_status != running && flags == 0) {
-		__db_errx(env, DB_STR("3639",
-	"a non-zero flags value is required for initial repmgr_start() call"));
+	if (!start_master &&
+	    SITE_FROM_EID(db_rep->self_eid)->membership != SITE_PRESENT) {
+		__db_errx(env, DB_STR("3696",
+		    "Current local site conflicts with earlier definition"));
 		return (EINVAL);
 	}
 
@@ -214,37 +282,54 @@ __repmgr_start(dbenv, nthreads, flags)
 	 *
 	 * Then, in case there could be multiple processes, we're either the
 	 * main listener process or a subordinate process.  On a "subsequent"
-	 * repmgr_start() call we already have enough information to know which
-	 * it is.  Otherwise, negotiate with information in the shared region to
-	 * claim the listener role if possible.
+	 * repmgr_start() call, with a running main listener process, we already
+	 * have enough information to know which it is.  Otherwise, if there is
+	 * no listener, negotiate with information in the shared region to claim
+	 * the listener role if possible.  Once we decide we're the listener,
+	 * mark the listener id in the shared region, so that no other process
+	 * thinks the same thing.
 	 *
 	 * To avoid a race, once we decide we're in the first call, mark the
 	 * handle as started, so that no other thread thinks the same thing.
 	 */
+	first = FALSE;
+	is_listener = FALSE;
 	LOCK_MUTEX(db_rep->mutex);
 	locked = TRUE;
-	if (db_rep->repmgr_status == running) {
-		first = FALSE;
+	if (db_rep->repmgr_status == running && !(rep->listener == 0 &&
+	    FLD_ISSET(rep->config, REP_C_AUTOTAKEOVER)))
 		is_listener = !IS_SUBORDINATE(db_rep);
-	} else {
+	else if (db_rep->repmgr_status != running &&
+	    rep->listener == 0 && flags == 0)
+		flags_error = 1;
+	else {
 		first = TRUE;
 		db_rep->repmgr_status = running;
 
-		ENV_ENTER(env, ip);
 		MUTEX_LOCK(env, rep->mtx_repmgr);
 		if (rep->listener == 0) {
 			is_listener = TRUE;
-			__os_id(dbenv, &rep->listener, NULL);
-		} else {
-			is_listener = FALSE;
+			__os_id(env->dbenv, &rep->listener, NULL);
+		} else
 			nthreads = 0;
-		}
 		MUTEX_UNLOCK(env, rep->mtx_repmgr);
-		ENV_LEAVE(env, ip);
 	}
 	UNLOCK_MUTEX(db_rep->mutex);
 	locked = FALSE;
 
+	/*
+	 * The first repmgr_start() call for the main listener process
+	 * requires a flags value to tell us how to start up the site.
+	 * But we don't require a flags value for the repmgr_start()
+	 * call for a subordinate process because the site is already
+	 * started and we would only ignore the value anyway.
+	 */
+	if (flags_error) {
+		__db_errx(env, DB_STR("3639",
+	"A non-zero flags value is required for initial repmgr_start() call"));
+		return (EINVAL);
+	}
+
 	if (!first) {
 		/*
 		 * Subsequent call is allowed when ELECTIONS are turned off, so
@@ -266,7 +351,7 @@ __repmgr_start(dbenv, nthreads, flags)
 
 	/*
 	 * The minimum legal number of threads is either 1 or 0, depending upon
-	 * whether we're the main process or a subordinate.
+	 * whether we're the listener process or a subordinate.
 	 */
 	min = is_listener ? 1 : 0;
 	if (nthreads < min) {
@@ -303,14 +388,24 @@ __repmgr_start(dbenv, nthreads, flags)
 		 * of rep_start calls even within an env region lifetime.
 		 */
 		if (start_master) {
-			ret = __repmgr_become_master(env);
+			ret = __repmgr_become_master(env, 0);
 			/* No other repmgr threads running yet. */
 			DB_ASSERT(env, ret != DB_REP_UNAVAIL);
 			if (ret != 0)
 				goto err;
 			need_masterseek = FALSE;
 		} else {
-			if ((ret = __repmgr_repstart(env, DB_REP_CLIENT)) != 0)
+			/*
+			 * The preferred master site cannot allow its gen
+			 * to change until it has done its lsnhist_match to
+			 * guarantee that no preferred master transactions
+			 * will be rolled back.
+			 */
+			if (IS_PREFMAS_MODE(env) &&
+			    FLD_ISSET(rep->config, REP_C_PREFMAS_MASTER))
+				startopts = REP_START_HOLD_CLIGEN;
+			if ((ret = __repmgr_repstart(env,
+			    DB_REP_CLIENT, startopts)) != 0)
 				goto err;
 			/*
 			 * The repmgr election code starts elections only if
@@ -352,6 +447,7 @@ __repmgr_start(dbenv, nthreads, flags)
 		if ((ret =
 		    __repmgr_start_msg_threads(env, (u_int)nthreads)) != 0)
 			goto err;
+		rep->listener_nthreads = (u_int)nthreads;
 
 		if (need_masterseek) {
 			/*
@@ -374,10 +470,47 @@ __repmgr_start(dbenv, nthreads, flags)
 		}
 		UNLOCK_MUTEX(db_rep->mutex);
 		locked = FALSE;
+		/*
+		 * Turn on the DB_EVENT_REP_INQUEUE_FULL event firing.  We only
+		 * do this for the main listener process.  For a subordinate
+		 * process, it is always turned on.
+		 */
+		rep->inqueue_full_event_on = 1;
+	}
+	if (db_rep->selector == NULL) {
+		/* All processes (even non-listeners) need a select() thread. */
+		if ((ret = __repmgr_start_selector(env)) == 0) {
+			/*
+			 * A view callback is set but this site isn't yet a
+			 * view in the internal site list.  Do the view
+			 * demotion here, which will update the internal
+			 * site list.  We need the select() thread for the
+			 * demotion because the demotion performs gmdb
+			 * operations.
+			 */
+			if (PARTICIPANT_TO_VIEW(db_rep,
+			    SITE_FROM_EID(db_rep->self_eid)) &&
+			    (ret = __repmgr_demote_site(env,
+			    db_rep->self_eid)) != 0)
+				goto err;
+			return (is_listener ? 0 : DB_REP_IGNORE);
+		}
+	} else {
+		/*
+		 * If the selector thread already exists, the current process
+		 * should be the new listener which has just finished a
+		 * takeover.  Now, all active connections need to be refreshed
+		 * to notify remote sites about the new listener.  If a new
+		 * connection is established immediately, disable the existing
+		 * main connection to the same site.  Otherwise, schedule a
+		 * second immediate attempt.  If it still fails, disable the
+		 * main connection and retry a connection as usual.
+		 */
+		DB_ASSERT(env, is_listener &&
+		    FLD_ISSET(rep->config, REP_C_AUTOTAKEOVER));
+		if ((ret = __repmgr_refresh_selector(env)) == 0)
+			return (0);
 	}
-	/* All processes (even non-listeners) need a select() thread. */
-	if ((ret = __repmgr_start_selector(env)) == 0)
-		return (is_listener ? 0 : DB_REP_IGNORE);
 
 err:
 	/* If we couldn't succeed at everything, undo the parts we did do. */
@@ -392,6 +525,16 @@ err:
 	if (!locked)
 		LOCK_MUTEX(db_rep->mutex);
 	(void)__repmgr_net_close(env);
+	/* Reset the listener when we fail before having a valid listen_fd. */
+	if (first && is_listener)
+		rep->listener = 0;
+	/*
+	 * Reset repmgr_status when we fail before starting a selector if the
+	 * earlier call to __repmgr_stop_threads() hasn't already reset it to
+	 * stopped.
+	 */
+	if (db_rep->repmgr_status == running)
+		db_rep->repmgr_status = ready;
 	UNLOCK_MUTEX(db_rep->mutex);
 	return (ret);
 }
@@ -425,6 +568,53 @@ __repmgr_valid_config(env, flags)
 }
 
 /*
+ * Set priority, heartbeat and election_retry timeouts for preferred master
+ * mode.  Turn on 2SITE_STRICT and ELECTIONS.  Can be called whether or not
+ * REP_ON() is true
+ *
+ * PUBLIC: int __repmgr_prefmas_auto_config __P((DB_ENV *, u_int32_t *));
+ */
+int __repmgr_prefmas_auto_config (dbenv, config_flags)
+	DB_ENV *dbenv;
+	u_int32_t *config_flags;
+{
+	ENV * env;
+	db_timeout_t timeout;
+	int ret;
+
+	env = dbenv->env;
+	timeout = 0;
+
+	/* Change heartbeat timeouts if they are not already set. */
+	if ((ret = __rep_get_timeout(dbenv,
+	    DB_REP_HEARTBEAT_MONITOR, &timeout)) == 0 &&
+	    timeout == 0 && (ret = __rep_set_timeout_int(env,
+	    DB_REP_HEARTBEAT_MONITOR,
+	    DB_REPMGR_PREFMAS_HEARTBEAT_MONITOR)) != 0)
+		return (ret);
+	if ((ret = __rep_get_timeout(dbenv,
+	    DB_REP_HEARTBEAT_SEND, &timeout)) == 0 &&
+	    timeout == 0 && (ret = __rep_set_timeout_int(env,
+	    DB_REP_HEARTBEAT_SEND, DB_REPMGR_PREFMAS_HEARTBEAT_SEND)) != 0)
+		return (ret);
+
+	/* Change election_retry timeout if it is still the default value. */
+	if ((ret = __rep_get_timeout(dbenv,
+	    DB_REP_ELECTION_RETRY, &timeout)) == 0 &&
+	    timeout == DB_REPMGR_DEFAULT_ELECTION_RETRY &&
+	    (ret = __rep_set_timeout_int(env,
+	    DB_REP_ELECTION_RETRY, DB_REPMGR_PREFMAS_ELECTION_RETRY)) != 0)
+		return (ret);
+
+	if ((ret = __rep_set_priority_int(env, FLD_ISSET(*config_flags,
+	    REP_C_PREFMAS_MASTER) ? DB_REPMGR_PREFMAS_PRIORITY_MASTER :
+	    DB_REPMGR_PREFMAS_PRIORITY_CLIENT)) != 0)
+		return (ret);
+	FLD_SET(*config_flags, REP_C_ELECTIONS | REP_C_2SITE_STRICT);
+	return (0);
+}
+
+/*
  * Starts message processing threads.  On entry, the actual number of threads
  * already active is db_rep->nthreads; the desired number of threads is passed
  * as "n".
@@ -473,7 +663,7 @@ __repmgr_restart(env, nthreads, flags)
 	REP *rep;
 	REPMGR_RUNNABLE **th;
 	u_int32_t cur_repflags;
-	int locked, ret, t_ret;
+	int locked, ret, role_change, t_ret;
 	u_int delta, i, min, nth;
 
 	th = NULL;
@@ -491,6 +681,7 @@ __repmgr_restart(env, nthreads, flags)
 	}
 
 	ret = 0;
+	role_change = 0;
 	db_rep = env->rep_handle;
 	DB_ASSERT(env, REP_ON(env));
 	rep = db_rep->region;
@@ -498,11 +689,14 @@ __repmgr_restart(env, nthreads, flags)
 	cur_repflags = F_ISSET(rep, REP_F_MASTER | REP_F_CLIENT);
 	DB_ASSERT(env, cur_repflags);
 	if (FLD_ISSET(cur_repflags, REP_F_MASTER) &&
-	    flags == DB_REP_CLIENT)
+	    flags == DB_REP_CLIENT) {
 		ret = __repmgr_become_client(env);
-	else if (FLD_ISSET(cur_repflags, REP_F_CLIENT) &&
-	    flags == DB_REP_MASTER)
-		ret = __repmgr_become_master(env);
+		role_change = 1;
+	} else if (FLD_ISSET(cur_repflags, REP_F_CLIENT) &&
+	    flags == DB_REP_MASTER) {
+		ret = __repmgr_become_master(env, 0);
+		role_change = 1;
+	}
 	if (ret != 0)
 		return (ret);
 
@@ -574,6 +768,9 @@ __repmgr_restart(env, nthreads, flags)
 		}
 		__os_free(env, th);
 	}
+	/* We will always turn on the inqueue full event after role change. */
+	if (role_change)
+		rep->inqueue_full_event_on = 1;
 
 out:	if (locked)
 		UNLOCK_MUTEX(db_rep->mutex);
@@ -668,7 +865,8 @@ __repmgr_start_selector(env)
  * PUBLIC: int __repmgr_close __P((ENV *));
  *
  * Close repmgr during env close.  It stops repmgr, frees sites array and
- * its addresses.
+ * its addresses.  Note that it is possible for the sites array to exist
+ * and require deallocation independently of whether repmgr was started.
  */
 int
 __repmgr_close(env)
@@ -679,10 +877,15 @@ __repmgr_close(env)
 	int ret;
 	u_int i;
 
-	db_rep = env->rep_handle;
+	if ((db_rep = env->rep_handle) == NULL)
+		return (0);
 	ret = 0;
 
-	ret = __repmgr_stop(env);
+	/* Stop repmgr and all of its threads if it was previously started. */
+	if (IS_ENV_REPLICATED(env))
+		ret = __repmgr_stop(env);
+
+	/* Clean up sites array regardless of whether we could stop repmgr. */
 	if (db_rep->sites != NULL) {
 		for (i = 0; i < db_rep->site_cnt; i++) {
 			site = &db_rep->sites[i];
@@ -756,9 +959,9 @@ __repmgr_set_ack_policy(dbenv, policy)
 	DB_ENV *dbenv;
 	int policy;
 {
+	ENV *env;
 	DB_REP *db_rep;
 	DB_THREAD_INFO *ip;
-	ENV *env;
 	REP *rep;
 	int ret;
 
@@ -823,6 +1026,208 @@ __repmgr_get_ack_policy(dbenv, policy)
 }
 
 /*
+ * PUBLIC: int __repmgr_set_incoming_queue_max __P((DB_ENV *, u_int32_t,
+ * PUBLIC:     u_int32_t));
+ *
+ * Sets the maximum amount of dynamic memory used by the Replication Manager
+ * incoming queue.
+ */
+int
+__repmgr_set_incoming_queue_max(dbenv, gbytes, bytes)
+	DB_ENV *dbenv;
+	u_int32_t gbytes, bytes;
+{
+	DB_REP *db_rep;
+	DB_THREAD_INFO *ip;
+	ENV *env;
+	REP *rep;
+
+	env = dbenv->env;
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+
+	ENV_NOT_CONFIGURED(
+	    env, db_rep->region, "DB_ENV->repmgr_set_incoming_queue_max",
+	    DB_INIT_REP);
+
+	if (APP_IS_BASEAPI(env)) {
+		__db_errx(env, "%s %s",
+		    "DB_ENV->repmgr_set_incoming_queue_max:",
+		    "cannot call from base replication application");
+		return (EINVAL);
+	}
+
+	/*
+	 * If the caller provided 0 for the size, the size will be unlimited.
+	 */
+	if (gbytes == 0 && bytes == 0) {
+		gbytes = UINT32_MAX;
+		bytes = GIGABYTE - 1;
+	}
+
+	while (bytes >= GIGABYTE) {
+		bytes -= GIGABYTE;
+		if (gbytes < UINT32_MAX)
+			gbytes++;
+	}
+
+	if (REP_ON(env)) {
+		ENV_ENTER(env, ip);
+		MUTEX_LOCK(env, rep->mtx_repmgr);
+		rep->inqueue_max_gbytes = gbytes;
+		rep->inqueue_max_bytes = bytes;
+		__repmgr_set_incoming_queue_redzone(rep, gbytes, bytes);
+		MUTEX_UNLOCK(env, rep->mtx_repmgr);
+		ENV_LEAVE(env, ip);
+	} else {
+		db_rep->inqueue_max_gbytes = gbytes;
+		db_rep->inqueue_max_bytes = bytes;
+	}
+
+	/*
+	 * Setting incoming queue maximum sizes makes this a replication
+	 * manager application.
+	 */
+	APP_SET_REPMGR(env);
+	return (0);
+}
+
+/*
+ * PUBLIC: int __repmgr_get_incoming_queue_max __P((DB_ENV *, u_int32_t *,
+ * PUBLIC:     u_int32_t *));
+ *
+ * Gets the maximum amount of dynamic memory that can be used by the
+ * Replicaton Manager incoming queue.
+ */
+int
+__repmgr_get_incoming_queue_max(dbenv, gbytesp, bytesp)
+	DB_ENV *dbenv;
+	u_int32_t *gbytesp, *bytesp;
+{
+	ENV *env;
+	DB_THREAD_INFO *ip;
+	DB_REP *db_rep;
+	REP *rep;
+
+	env = dbenv->env;
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+
+	if (REP_ON(env)) {
+		ENV_ENTER(env, ip);
+		MUTEX_LOCK(env, rep->mtx_repmgr);
+		*gbytesp = rep->inqueue_max_gbytes;
+		*bytesp = rep->inqueue_max_bytes;
+		MUTEX_UNLOCK(env, rep->mtx_repmgr);
+		ENV_LEAVE(env, ip);
+	} else {
+		*gbytesp = db_rep->inqueue_max_gbytes;
+		*bytesp = db_rep->inqueue_max_bytes;
+	}
+
+	return (0);
+}
+
+/*
+ * PUBLIC: void __repmgr_set_incoming_queue_redzone __P((void *, u_int32_t,
+ * PUBLIC:     u_int32_t));
+ *
+ * Sets the lower bound of the repmgr incoming queue red zone.
+ * !!! Assumes caller holds mtx_repmgr lock.
+ *
+ * Note that we can't simply get the REP* address from the env as we usually do,
+ * because at the time of this call it may not have been linked into there yet.
+ * Also note that, REP is not a public structure, so we use "void *" here.
+ */
+void __repmgr_set_incoming_queue_redzone(rep_, gbytes, bytes)
+	void *rep_;
+	u_int32_t gbytes, bytes;
+{
+	REP *rep;
+	double rdgbytes, rdbytes;
+
+	rep = rep_;
+
+	/*
+	 * We use 'double' values to do the computation for precision, and
+	 * to avoid overflow.
+	 */
+	rdgbytes = gbytes * 1.00 * DB_REPMGR_INQUEUE_REDZONE_PERCENT / 100.00;
+	rdbytes = (rdgbytes - (u_int32_t)rdgbytes) * GIGABYTE;
+	rdbytes += bytes * 1.00 * DB_REPMGR_INQUEUE_REDZONE_PERCENT / 100.00;
+	if (rdbytes >= GIGABYTE) {
+		rdgbytes += 1;
+		rdbytes -= GIGABYTE;
+	}
+	rep->inqueue_rz_gbytes = (u_int32_t)rdgbytes;
+	rep->inqueue_rz_bytes = (u_int32_t)rdbytes;
+}
+
+/*
+ * PUBLIC: int __repmgr_get_incoming_queue_redzone __P((DB_ENV *,
+ * PUBLIC:     u_int32_t *, u_int32_t *));
+ *
+ * Gets the lower bound of the repmgr incoming queue red zone.
+ * This method must be called after environment open.
+ */
+int __repmgr_get_incoming_queue_redzone(dbenv, gbytesp, bytesp)
+	DB_ENV *dbenv;
+	u_int32_t *gbytesp, *bytesp;
+{
+	DB_REP *db_rep;
+	DB_THREAD_INFO *ip;
+	ENV *env;
+	REP *rep;
+
+	env = dbenv->env;
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+
+	ENV_REQUIRES_CONFIG(
+	    env, db_rep->region, "__repmgr_get_incoming_queue_redzone",
+	    DB_INIT_REP);
+
+	ENV_ENTER(env, ip);
+	MUTEX_LOCK(env, rep->mtx_repmgr);
+	*gbytesp = rep->inqueue_rz_gbytes;
+	*bytesp = rep->inqueue_rz_bytes;
+	MUTEX_UNLOCK(env, rep->mtx_repmgr);
+	ENV_LEAVE(env, ip);
+
+	return (0);
+}
+
+/*
+ * PUBLIC: int __repmgr_get_incoming_queue_fullevent __P((DB_ENV *,
+ * PUBLIC:     int *));
+ *
+ * Return whether the DB_EVENT_REP_INQUEUE_FULL event firing is
+ * turned on or off.
+ * This method must be called after environment open.
+ */
+int __repmgr_get_incoming_queue_fullevent(dbenv, onoffp)
+	DB_ENV *dbenv;
+	int *onoffp;
+{
+	DB_REP *db_rep;
+	ENV *env;
+	REP *rep;
+
+	env = dbenv->env;
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+
+	ENV_REQUIRES_CONFIG(
+	    env, db_rep->region,
+	    "DB_ENV->__repmgr_get_incoming_queue_fullevent",
+	    DB_INIT_REP);
+
+	*onoffp = rep->inqueue_full_event_on ? 1 : 0;
+
+	return (0);
+}
+
+/*
  * PUBLIC: int __repmgr_env_create __P((ENV *, DB_REP *));
  */
 int
@@ -837,7 +1242,13 @@ __repmgr_env_create(env, db_rep)
 	db_rep->connection_retry_wait = DB_REPMGR_DEFAULT_CONNECTION_RETRY;
 	db_rep->election_retry_wait = DB_REPMGR_DEFAULT_ELECTION_RETRY;
 	db_rep->config_nsites = 0;
+	ADJUST_AUTOTAKEOVER_WAITS(db_rep, DB_REPMGR_DEFAULT_ACK_TIMEOUT);
 	db_rep->perm_policy = DB_REPMGR_ACKS_QUORUM;
+	db_rep->inqueue_max_gbytes = 0;
+	db_rep->inqueue_max_bytes = 0;
+#ifdef HAVE_REPLICATION_LISTENER_TAKEOVER
+	FLD_SET(db_rep->config, REP_C_AUTOTAKEOVER);
+#endif
 	FLD_SET(db_rep->config, REP_C_ELECTIONS);
 	FLD_SET(db_rep->config, REP_C_2SITE_STRICT);
 
@@ -846,7 +1257,8 @@ __repmgr_env_create(env, db_rep)
 	TAILQ_INIT(&db_rep->connections);
 	TAILQ_INIT(&db_rep->retries);
 
-	db_rep->input_queue.size = 0;
+	db_rep->input_queue.gbytes = 0;
+	db_rep->input_queue.bytes = 0;
 	STAILQ_INIT(&db_rep->input_queue.header);
 
 	__repmgr_env_create_pf(db_rep);
@@ -944,6 +1356,15 @@ __repmgr_await_threads(env)
 	 * of a connector thread.
 	 */
 
+	/* Takeover thread. */
+	if (db_rep->takeover_thread != NULL) {
+		if ((t_ret = __repmgr_thread_join(db_rep->takeover_thread)) !=
+		    0 && ret == 0)
+			ret = t_ret;
+		__os_free(env, db_rep->takeover_thread);
+		db_rep->takeover_thread = NULL;
+	}
+
 	/* Message processing threads. */
 	for (i = 0;
 	    i < db_rep->nthreads && db_rep->messengers[i] != NULL; i++) {
@@ -1178,7 +1599,7 @@ get_shared_netaddr(env, eid, netaddr)
 	MUTEX_LOCK(env, rep->mtx_repmgr);
 
 	if ((u_int)eid >= rep->site_cnt) {
-		ret = DB_NOTFOUND;
+		ret = USR_ERR(env, DB_NOTFOUND);
 		goto err;
 	}
 	DB_ASSERT(env, rep->siteinfo_off != INVALID_ROFF);
@@ -1423,7 +1844,7 @@ send_msg_self(env, iovecs, nmsg)
 	u_int32_t nmsg;
 {
 	REPMGR_MESSAGE *msg;
-	size_t align, bodysize, structsize;
+	size_t align, bodysize, msgsize, structsize;
 	u_int8_t *membase;
 	int ret;
 
@@ -1431,10 +1852,12 @@ send_msg_self(env, iovecs, nmsg)
 	bodysize = iovecs->total_bytes - __REPMGR_MSG_HDR_SIZE;
 	structsize = (size_t)DB_ALIGN((size_t)(sizeof(REPMGR_MESSAGE) +
 	    nmsg * sizeof(DBT)), align);
-	if ((ret = __os_malloc(env, structsize + bodysize, &membase)) != 0)
+	msgsize = structsize + bodysize;
+	if ((ret = __os_malloc(env, msgsize, &membase)) != 0)
 		return (ret);
 
 	msg = (void*)membase;
+	msg->size = msgsize;
 	membase += structsize;
 
 	/*
@@ -1616,13 +2039,14 @@ __repmgr_send_request(db_channel, request, nrequest, response, timeout, flags)
 	}
 
 	ENV_ENTER(env, ip);
-	ret = get_channel_connection(channel, &conn);
-	ENV_LEAVE(env, ip);
-	if (ret != 0)
-		return (ret);
+	if ((ret = get_channel_connection(channel, &conn)) != 0)
+		goto out;
 
-	if (conn == NULL)
-		return (request_self(env, request, nrequest, response, flags));
+	/* If conn is NULL, call request_self and then we are done here. */
+	if (conn == NULL) {
+		ret = request_self(env, request, nrequest, response, flags);
+		goto out;
+	}
 
 	/* Find an available array slot, or grow the array if necessary. */
 	LOCK_MUTEX(db_rep->mutex);
@@ -1670,7 +2094,7 @@ __repmgr_send_request(db_channel, request, nrequest, response, timeout, flags)
 		LOCK_MUTEX(db_rep->mutex);
 		F_CLR(&conn->responses[i], RESP_IN_USE | RESP_THREAD_WAITING);
 		UNLOCK_MUTEX(db_rep->mutex);
-		return (ret);
+		goto out;
 	}
 
 	timeout = timeout > 0 ? timeout : db_channel->timeout;
@@ -1688,7 +2112,7 @@ __repmgr_send_request(db_channel, request, nrequest, response, timeout, flags)
 		 * to wake up those threads, with a COMPLETE indication and an
 		 * error code.  That's more than we want to tackle here.
 		 */
-		return (ret);
+		goto out;
 	}
 
 	/*
@@ -1732,7 +2156,7 @@ __repmgr_send_request(db_channel, request, nrequest, response, timeout, flags)
 			sz = conn->iovecs.vectors[0].iov_len;
 
 			if ((ret = __os_malloc(env, sz, &dummy)) != 0)
-				goto out;
+				goto out_unlck;
 			__repmgr_iovec_init(&conn->iovecs);
 			DB_INIT_DBT(resp->dbt, dummy, sz);
 			__repmgr_add_dbt(&conn->iovecs, &resp->dbt);
@@ -1740,8 +2164,9 @@ __repmgr_send_request(db_channel, request, nrequest, response, timeout, flags)
 		}
 	}
 
-out:
+out_unlck:
 	UNLOCK_MUTEX(db_rep->mutex);
+out:	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -2168,6 +2593,7 @@ __repmgr_channel_close(dbchan, flags)
 {
 	ENV *env;
 	DB_REP *db_rep;
+	DB_THREAD_INFO *ip;
 	REPMGR_CONNECTION *conn;
 	CHANNEL *channel;
 	u_int32_t i;
@@ -2182,6 +2608,7 @@ __repmgr_channel_close(dbchan, flags)
 	 * Disable connection(s) (if not already done due to an error having
 	 * occurred previously); release our reference to conn struct(s).
 	 */
+	ENV_ENTER(env, ip);
 	LOCK_MUTEX(db_rep->mutex);
 	if (dbchan->eid >= 0) {
 		conn = channel->c.conn;
@@ -2218,6 +2645,7 @@ __repmgr_channel_close(dbchan, flags)
 	__os_free(env, channel);
 	__os_free(env, dbchan);
 
+	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -2369,29 +2797,26 @@ join_group_at_site(env, addrp)
 	repmgr_netaddr_t *addrp;
 {
 	DB_REP *db_rep;
+	REP *rep;
 	REPMGR_CONNECTION *conn;
 	SITE_STRING_BUFFER addr_buf;
 	repmgr_netaddr_t addr, myaddr;
 	__repmgr_gm_fwd_args fwd;
 	__repmgr_site_info_args site_info;
+	__repmgr_v4site_info_args v4site_info;
 	u_int8_t *p, *response_buf, siteinfo_buf[MAX_MSG_BUF];
 	char host_buf[MAXHOSTNAMELEN + 1], *host;
 	u_int32_t gen, type;
-	size_t len;
+	size_t host_len, msg_len, req_len;
 	int ret, t_ret;
 
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 
 	LOCK_MUTEX(db_rep->mutex);
 	myaddr = SITE_FROM_EID(db_rep->self_eid)->net_addr;
 	UNLOCK_MUTEX(db_rep->mutex);
-	len = strlen(myaddr.host) + 1;
-	DB_INIT_DBT(site_info.host, myaddr.host, len);
-	site_info.port = myaddr.port;
-	site_info.flags = 0;
-	ret = __repmgr_site_info_marshal(env,
-	    &site_info, siteinfo_buf, sizeof(siteinfo_buf), &len);
-	DB_ASSERT(env, ret == 0);
+	host_len = strlen(myaddr.host) + 1;
 
 	conn = NULL;
 	response_buf = NULL;
@@ -2399,14 +2824,35 @@ join_group_at_site(env, addrp)
 	RPRINT(env, (env, DB_VERB_REPMGR_MISC, "try join request to site %s",
 	    __repmgr_format_addr_loc(addrp, addr_buf)));
 retry:
-	if ((ret = make_request_conn(env, addrp, &conn)) != 0)
+	if ((ret = __repmgr_make_request_conn(env, addrp, &conn)) != 0)
 		return (ret);
+	DB_ASSERT(env, conn->version > 0 && conn->version <= DB_REPMGR_VERSION);
+	if (conn->version < 5) {
+		DB_INIT_DBT(v4site_info.host, myaddr.host, host_len);
+		v4site_info.port = myaddr.port;
+		v4site_info.flags = 0;
+		ret = __repmgr_v4site_info_marshal(env,
+		    &v4site_info, siteinfo_buf, sizeof(siteinfo_buf), &req_len);
+	} else {
+		DB_INIT_DBT(site_info.host, myaddr.host, host_len);
+		site_info.port = myaddr.port;
+		site_info.status = 0;
+		site_info.flags = 0;
+		if (IS_VIEW_SITE(env))
+			FLD_SET(site_info.flags, SITE_VIEW);
+		if (rep->priority > 0)
+			FLD_SET(site_info.flags, SITE_JOIN_ELECTABLE);
+		ret = __repmgr_site_info_marshal(env,
+		    &site_info, siteinfo_buf, sizeof(siteinfo_buf), &req_len);
+	}
+	DB_ASSERT(env, ret == 0);
+	/* Preserve separate request length in case there is a retry. */
 	if ((ret = __repmgr_send_sync_msg(env, conn,
-	    REPMGR_JOIN_REQUEST, siteinfo_buf, (u_int32_t)len)) != 0)
+	    REPMGR_JOIN_REQUEST, siteinfo_buf, (u_int32_t)req_len)) != 0)
 		goto err;
 
-	if ((ret = read_own_msg(env,
-	    conn, &type, &response_buf, &len)) != 0)
+	if ((ret = __repmgr_read_own_msg(env,
+	    conn, &type, &response_buf, &msg_len)) != 0)
 		goto err;
 
 	if (type == REPMGR_GM_FAILURE) {
@@ -2429,7 +2875,7 @@ retry:
 			goto err;
 
 		ret = __repmgr_gm_fwd_unmarshal(env, &fwd,
-		    response_buf, len, &p);
+		    response_buf, msg_len, &p);
 		DB_ASSERT(env, ret == 0);
 		if (fwd.gen > gen) {
 			if (fwd.host.size > MAXHOSTNAMELEN + 1) {
@@ -2456,7 +2902,8 @@ retry:
 		}
 	}
 	if (type == REPMGR_JOIN_SUCCESS)
-		ret = __repmgr_refresh_membership(env, response_buf, len);
+		ret = __repmgr_refresh_membership(env, response_buf, msg_len,
+		    conn->version);
 	else
 		ret = DB_REP_UNAVAIL; /* Invalid response: protocol violation */
 
@@ -2476,129 +2923,6 @@ err:
 }
 
 /*
- * Reads a whole message, when we expect to get a REPMGR_OWN_MSG.
- */
-static int
-read_own_msg(env, conn, typep, bufp, lenp)
-	ENV *env;
-	REPMGR_CONNECTION *conn;
-	u_int32_t *typep;
-	u_int8_t **bufp;
-	size_t *lenp;
-{
-	__repmgr_msg_hdr_args msg_hdr;
-	u_int8_t *buf;
-	u_int32_t type;
-	size_t size;
-	int ret;
-
-	__repmgr_reset_for_reading(conn);
-	if ((ret = __repmgr_read_conn(conn)) != 0)
-		goto err;
-	ret = __repmgr_msg_hdr_unmarshal(env, &msg_hdr,
-	    conn->msg_hdr_buf, __REPMGR_MSG_HDR_SIZE, NULL);
-	DB_ASSERT(env, ret == 0);
-
-	if ((conn->msg_type = msg_hdr.type) != REPMGR_OWN_MSG) {
-		ret = DB_REP_UNAVAIL; /* Protocol violation. */
-		goto err;
-	}
-	type = REPMGR_OWN_MSG_TYPE(msg_hdr);
-	if ((size = (size_t)REPMGR_OWN_BUF_SIZE(msg_hdr)) > 0) {
-		conn->reading_phase = DATA_PHASE;
-		__repmgr_iovec_init(&conn->iovecs);
-
-		if ((ret = __os_malloc(env, size, &buf)) != 0)
-			goto err;
-		conn->input.rep_message = NULL;
-
-		__repmgr_add_buffer(&conn->iovecs, buf, size);
-		if ((ret = __repmgr_read_conn(conn)) != 0) {
-			__os_free(env, buf);
-			goto err;
-		}
-		*bufp = buf;
-	}
-
-	*typep = type;
-	*lenp = size;
-
-err:
-	return (ret);
-}
-
-static int
-make_request_conn(env, addr, connp)
-	ENV *env;
-	repmgr_netaddr_t *addr;
-	REPMGR_CONNECTION **connp;
-{
-	DBT vi;
-	__repmgr_msg_hdr_args msg_hdr;
-	__repmgr_version_confirmation_args conf;
-	REPMGR_CONNECTION *conn;
-	int alloc, ret, unused;
-
-	alloc = FALSE;
-	if ((ret = __repmgr_connect(env, addr, &conn, &unused)) != 0)
-		return (ret);
-	conn->type = APP_CONNECTION;
-
-	/* Read a handshake msg, to get version confirmation and parameters. */
-	if ((ret = __repmgr_read_conn(conn)) != 0)
-		goto err;
-	/*
-	 * We can only get here after having read the full 9 bytes that we
-	 * expect, so this can't fail.
-	 */
-	DB_ASSERT(env, conn->reading_phase == SIZES_PHASE);
-	ret = __repmgr_msg_hdr_unmarshal(env, &msg_hdr,
-	    conn->msg_hdr_buf, __REPMGR_MSG_HDR_SIZE, NULL);
-	DB_ASSERT(env, ret == 0);
-	__repmgr_iovec_init(&conn->iovecs);
-	conn->reading_phase = DATA_PHASE;
-
-	if ((ret = __repmgr_prepare_simple_input(env, conn, &msg_hdr)) != 0)
-		goto err;
-	alloc = TRUE;
-
-	if ((ret = __repmgr_read_conn(conn)) != 0)
-		goto err;
-
-	/*
-	 * Analyze the handshake msg, and stash relevant info.
-	 */
-	if ((ret = __repmgr_find_version_info(env, conn, &vi)) != 0)
-		goto err;
-	DB_ASSERT(env, vi.size > 0);
-	if ((ret = __repmgr_version_confirmation_unmarshal(env,
-	    &conf, vi.data, vi.size, NULL)) != 0)
-		goto err;
-
-	if (conf.version < GM_MIN_VERSION) {
-		ret = DB_REP_UNAVAIL;
-		goto err;
-	}
-	conn->version = conf.version;
-
-err:
-	if (alloc) {
-		DB_ASSERT(env, conn->input.repmgr_msg.cntrl.size > 0);
-		__os_free(env, conn->input.repmgr_msg.cntrl.data);
-		DB_ASSERT(env, conn->input.repmgr_msg.rec.size > 0);
-		__os_free(env, conn->input.repmgr_msg.rec.data);
-	}
-	__repmgr_reset_for_reading(conn);
-	if (ret == 0)
-		*connp = conn;
-	else {
-		(void)__repmgr_close_connection(env, conn);
-		(void)__repmgr_destroy_conn(env, conn);
-	}
-	return (ret);
-}
-
-/*
  * PUBLIC: int __repmgr_site __P((DB_ENV *,
  * PUBLIC:     const char *, u_int, DB_SITE **, u_int32_t));
  */
@@ -2640,9 +2964,9 @@ site_by_addr(env, host, port, sitep)
 	if ((ret = addr_chk(env, host, port)) != 0)
 		return (ret);
 
+	ENV_ENTER(env, ip);
 	if (REP_ON(env)) {
 		LOCK_MUTEX(db_rep->mutex);
-		ENV_ENTER(env, ip);
 		locked = TRUE;
 	} else
 		locked = FALSE;
@@ -2654,10 +2978,9 @@ site_by_addr(env, host, port, sitep)
 	 * we want the DB_SITE handle to point to; just like site_by_eid() does.
 	 */
 	host = site->net_addr.host;
-	if (locked) {
-		ENV_LEAVE(env, ip);
+	if (locked)
 		UNLOCK_MUTEX(db_rep->mutex);
-	}
+	ENV_LEAVE(env, ip);
 	if (ret != 0)
 		return (ret);
 
@@ -2723,7 +3046,7 @@ init_dbsite(env, eid, host, port, sitep)
 	dbsite->get_address = __repmgr_get_site_address;
 	dbsite->get_config = __repmgr_get_config;
 	dbsite->get_eid = __repmgr_get_eid;
-	dbsite->set_config = __repmgr_site_config;
+	dbsite->set_config = __repmgr_site_config_pp;
 	dbsite->remove = __repmgr_remove_site_pp;
 	dbsite->close = __repmgr_site_close;
 
@@ -2756,9 +3079,16 @@ __repmgr_get_eid(dbsite, eidp)
 	DB_SITE *dbsite;
 	int *eidp;
 {
+	DB_THREAD_INFO *ip;
+	ENV *env;
 	int ret;
 
-	if ((ret = refresh_site(dbsite)) != 0)
+	env = dbsite->env;
+
+	ENV_ENTER(env, ip);
+	ret = refresh_site(dbsite);
+	ENV_LEAVE(env, ip);
+	if (ret != 0)
 		return (ret);
 
 	if (F_ISSET(dbsite, DB_SITE_PREOPEN)) {
@@ -2791,8 +3121,11 @@ __repmgr_get_config(dbsite, which, valuep)
 	env = dbsite->env;
 	db_rep = env->rep_handle;
 
-	if ((ret = refresh_site(dbsite)) != 0)
+	ENV_ENTER(env, ip);
+	if ((ret = refresh_site(dbsite)) != 0) {
+		ENV_LEAVE(env, ip);
 		return (ret);
+	}
 	LOCK_MUTEX(db_rep->mutex);
 	DB_ASSERT(env, IS_VALID_EID(dbsite->eid));
 	site = SITE_FROM_EID(dbsite->eid);
@@ -2800,32 +3133,52 @@ __repmgr_get_config(dbsite, which, valuep)
 		rep = db_rep->region;
 		infop = env->reginfo;
 
-		ENV_ENTER(env, ip);
 		MUTEX_LOCK(env, rep->mtx_repmgr);
 		sites = R_ADDR(infop, rep->siteinfo_off);
 
 		site->config = sites[dbsite->eid].config;
 
 		MUTEX_UNLOCK(env, rep->mtx_repmgr);
-		ENV_LEAVE(env, ip);
 	}
 	*valuep = FLD_ISSET(site->config, which) ? 1 : 0;
 	UNLOCK_MUTEX(db_rep->mutex);
+	ENV_LEAVE(env, ip);
 	return (0);
 }
 
 /*
- * PUBLIC: int __repmgr_site_config __P((DB_SITE *, u_int32_t, u_int32_t));
+ * PUBLIC: int __repmgr_site_config_pp __P((DB_SITE *, u_int32_t, u_int32_t));
  */
 int
-__repmgr_site_config(dbsite, which, value)
+__repmgr_site_config_pp(dbsite, which, value)
 	DB_SITE *dbsite;
 	u_int32_t which;
 	u_int32_t value;
 {
-	DB_REP *db_rep;
 	DB_THREAD_INFO *ip;
 	ENV *env;
+	int ret;
+
+	env = dbsite->env;
+
+	ENV_ENTER(env, ip);
+	ret = __repmgr_site_config_int(dbsite, which, value);
+	ENV_LEAVE(env, ip);
+
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __repmgr_site_config_int __P((DB_SITE *, u_int32_t, u_int32_t));
+ */
+int
+__repmgr_site_config_int(dbsite, which, value)
+	DB_SITE *dbsite;
+	u_int32_t which;
+	u_int32_t value;
+{
+	DB_REP *db_rep;
+	ENV *env;
 	REGINFO *infop;
 	REP *rep;
 	REPMGR_SITE *site;
@@ -2875,7 +3228,6 @@ __repmgr_site_config(dbsite, which, value)
 		infop = env->reginfo;
 
 		LOCK_MUTEX(db_rep->mutex);
-		ENV_ENTER(env, ip);
 		MUTEX_LOCK(env, rep->mtx_repmgr);
 		sites = R_ADDR(infop, rep->siteinfo_off);
 		site = SITE_FROM_EID(dbsite->eid);
@@ -2896,7 +3248,6 @@ __repmgr_site_config(dbsite, which, value)
 			rep->siteinfo_seq++;
 		}
 		MUTEX_UNLOCK(env, rep->mtx_repmgr);
-		ENV_LEAVE(env, ip);
 		UNLOCK_MUTEX(db_rep->mutex);
 	} else {
 		site = SITE_FROM_EID(dbsite->eid);
@@ -2930,7 +3281,6 @@ set_local_site(dbsite, value)
 	if (REP_ON(env)) {
 		rep = db_rep->region;
 		LOCK_MUTEX(db_rep->mutex);
-		ENV_ENTER(env, ip);
 		MUTEX_LOCK(env, rep->mtx_repmgr);
 		locked = TRUE;
 		/* Make sure we're in sync first. */
@@ -2941,31 +3291,32 @@ set_local_site(dbsite, value)
 		__db_errx(env, DB_STR("3666",
 		    "A previously given local site may not be unset"));
 		ret = EINVAL;
-	} else if (IS_VALID_EID(db_rep->self_eid) &&
-	    db_rep->self_eid != dbsite->eid) {
-		__db_errx(env, DB_STR("3667",
-		    "A (different) local site has already been set"));
-		ret = EINVAL;
-	} else {
-		DB_ASSERT(env, IS_VALID_EID(dbsite->eid));
-		site = SITE_FROM_EID(dbsite->eid);
-		if (FLD_ISSET(site->config,
-		    DB_BOOTSTRAP_HELPER | DB_REPMGR_PEER)) {
-			__db_errx(env, DB_STR("3668",
-		    "Local site cannot have HELPER or PEER attributes"));
+	} else if (value) {
+		if (IS_VALID_EID(db_rep->self_eid) &&
+		    db_rep->self_eid != dbsite->eid) {
+			__db_errx(env, DB_STR("3697",
+			    "A (different) local site has already been set"));
 			ret = EINVAL;
+		} else {
+			DB_ASSERT(env, IS_VALID_EID(dbsite->eid));
+			site = SITE_FROM_EID(dbsite->eid);
+			if (FLD_ISSET(site->config,
+			    DB_BOOTSTRAP_HELPER | DB_REPMGR_PEER)) {
+				__db_errx(env, DB_STR("3698",
+			"Local site cannot have HELPER or PEER attributes"));
+				ret = EINVAL;
+			}
 		}
 	}
-	if (ret == 0) {
+	if (ret == 0 && value) {
 		db_rep->self_eid = dbsite->eid;
 		if (locked) {
-			rep->self_eid = dbsite->eid;
+			rep->self_eid = db_rep->self_eid;
 			rep->siteinfo_seq++;
 		}
 	}
 	if (locked) {
 		MUTEX_UNLOCK(env, rep->mtx_repmgr);
-		ENV_LEAVE(env, ip);
 		UNLOCK_MUTEX(db_rep->mutex);
 	}
 	return (ret);
@@ -2998,7 +3349,7 @@ refresh_site(dbsite)
 }
 
 static int
-__repmgr_remove_site_pp(dbsite)
+__repmgr_remove_and_close_site(dbsite)
 	DB_SITE *dbsite;
 {
 	int ret, t_ret;
@@ -3011,6 +3362,23 @@ __repmgr_remove_site_pp(dbsite)
 	 */
 	if ((t_ret = __repmgr_site_close(dbsite)) != 0 && ret == 0)
 		ret = t_ret;
+
+	return (ret);
+}
+
+static int
+__repmgr_remove_site_pp(dbsite)
+	DB_SITE *dbsite;
+{
+	ENV *env;
+	DB_THREAD_INFO *ip;
+	int ret;
+
+	env = dbsite->env;
+
+	ENV_ENTER(env, ip);
+	ret = __repmgr_remove_and_close_site(dbsite);
+	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -3024,6 +3392,7 @@ __repmgr_remove_site(dbsite)
 	REPMGR_CONNECTION *conn;
 	repmgr_netaddr_t addr;
 	__repmgr_site_info_args site_info;
+	__repmgr_v4site_info_args v4site_info;
 	u_int8_t *response_buf, siteinfo_buf[MAX_MSG_BUF];
 	size_t len;
 	u_int32_t type;
@@ -3046,23 +3415,33 @@ __repmgr_remove_site(dbsite)
 	DB_ASSERT(env, IS_VALID_EID(master));
 	addr = SITE_FROM_EID(master)->net_addr;
 	UNLOCK_MUTEX(db_rep->mutex);
-
 	len = strlen(dbsite->host) + 1;
-	DB_INIT_DBT(site_info.host, dbsite->host, len);
-	site_info.port = dbsite->port;
-	site_info.flags = 0;
-	ret = __repmgr_site_info_marshal(env,
-	    &site_info, siteinfo_buf, sizeof(siteinfo_buf), &len);
-	DB_ASSERT(env, ret == 0);
 
 	conn = NULL;
 	response_buf = NULL;
-	if ((ret = make_request_conn(env, &addr, &conn)) != 0)
+	if ((ret = __repmgr_make_request_conn(env, &addr, &conn)) != 0)
 		return (ret);
+	DB_ASSERT(env, conn->version > 0 && conn->version <= DB_REPMGR_VERSION);
+	if (conn->version < 5) {
+		DB_INIT_DBT(v4site_info.host, dbsite->host, len);
+		v4site_info.port = dbsite->port;
+		v4site_info.flags = 0;
+		ret = __repmgr_v4site_info_marshal(env,
+		    &v4site_info, siteinfo_buf, sizeof(siteinfo_buf), &len);
+	} else {
+		DB_INIT_DBT(site_info.host, dbsite->host, len);
+		site_info.port = dbsite->port;
+		site_info.status = 0;
+		site_info.flags = 0;
+		ret = __repmgr_site_info_marshal(env,
+		    &site_info, siteinfo_buf, sizeof(siteinfo_buf), &len);
+	}
+	DB_ASSERT(env, ret == 0);
+
 	if ((ret = __repmgr_send_sync_msg(env, conn,
 	    REPMGR_REMOVE_REQUEST, siteinfo_buf, (u_int32_t)len)) != 0)
 		goto err;
-	if ((ret = read_own_msg(env,
+	if ((ret = __repmgr_read_own_msg(env,
 	    conn, &type, &response_buf, &len)) != 0)
 		goto err;
 	ret = type == REPMGR_REMOVE_SUCCESS ? 0 : DB_REP_UNAVAIL;
@@ -3090,3 +3469,82 @@ __repmgr_site_close(dbsite)
 	__os_free(dbsite->env, dbsite);
 	return (0);
 }
+
+/*
+ * Demotes a participant site to a view.  This is a one-way and one-time
+ * operation.
+ *
+ * The demotion occurs at the very end of repmgr_start() because it
+ * requires a select thread to perform the gmdb operations that remove
+ * the site from the replication group and immediately add the site back
+ * into the group as a view.  The demotion also preserves any other threads
+ * created by repmgr_start() so that they are there to be used by the
+ * demoted site after it is re-added as a view site.
+ *
+ * We remove and re-add the site to propagate the site's change from
+ * participant to view to all sites in the replication group.  This includes
+ * updates to each site's gmdb and in-memory site list.
+ */
+#define	REPMGR_DEMOTION_MASTER_RETRIES	10
+#define	REPMGR_DEMOTION_RETRY_USECS	500000
+static int
+__repmgr_demote_site(env, eid)
+	ENV *env;
+	int eid;
+{
+	DB_REP *db_rep;
+	DB_SITE *dbsite;
+	REP *rep;
+	REPMGR_SITE *site;
+	int ret, t_ret, tries;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	site = SITE_FROM_EID(eid);
+	dbsite = NULL;
+
+	/* Inform other repmgr threads that a demotion is in progress. */
+	db_rep->demotion_pending = TRUE;
+
+	if ((ret = init_dbsite(env, eid, site->net_addr.host,
+	    site->net_addr.port, &dbsite)) != 0)
+		goto err;
+
+	/*
+	 * We need a master to perform the gmdb updates.  Poll periodically
+	 * for a limited time to find one.
+	 */
+	tries = 0;
+	while (rep->master_id == DB_EID_INVALID) {
+		__os_yield(env, 0, REPMGR_DEMOTION_RETRY_USECS);
+		if (++tries >= REPMGR_DEMOTION_MASTER_RETRIES) {
+			ret = DB_REP_UNAVAIL;
+			goto err;
+		}
+	}
+
+	/* Remove site from replication group. */
+	if ((ret = __repmgr_remove_site(dbsite)) != 0)
+		goto err;
+
+	/*
+	 * Add site back into replication group as a view.  This demotion is
+	 * occurring because this site now has a view callback but its
+	 * SITE_VIEW flag is not set.  Now, __repmgr_join_group() will detect
+	 * the view callback and set the SITE_VIEW flag before sending this
+	 * site's information to the rest of the replication group.
+	 */
+	if ((ret = __repmgr_join_group(env)) != 0)
+		goto err;
+
+err:
+	/* Deallocates dbsite. */
+	if (dbsite != NULL) {
+		t_ret = __repmgr_site_close(dbsite);
+		if (ret == 0 && t_ret != 0)
+			ret = t_ret;
+	}
+	/* Must reset demotion_pending before leaving this routine. */
+	db_rep->demotion_pending = FALSE;
+	return (ret);
+}
diff --git a/src/repmgr/repmgr_msg.c b/src/repmgr/repmgr_msg.c
index 13537823..71cb2ada 100644
--- a/src/repmgr/repmgr_msg.c
+++ b/src/repmgr/repmgr_msg.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -15,15 +15,19 @@
 #include "dbinc_auto/repmgr_auto.h"
 
 static int dispatch_app_message __P((ENV *, REPMGR_MESSAGE *));
-static int finish_gmdb_update __P((ENV *,
-	DB_THREAD_INFO *, DBT *, u_int32_t, u_int32_t, __repmgr_member_args *));
+static int finish_gmdb_update __P((ENV *, DB_THREAD_INFO *,
+    DBT *, u_int32_t, u_int32_t, u_int32_t, __repmgr_member_args *));
 static int incr_gm_version __P((ENV *, DB_THREAD_INFO *, DB_TXN *));
-static void marshal_site_data __P((ENV *, u_int32_t, u_int8_t *, DBT *));
+static void marshal_site_data __P((ENV *,
+    u_int32_t, u_int32_t, u_int8_t *, DBT *));
 static void marshal_site_key __P((ENV *,
 	repmgr_netaddr_t *, u_int8_t *, DBT *, __repmgr_member_args *));
 static int message_loop __P((ENV *, REPMGR_RUNNABLE *));
+static int preferred_master_takeover __P((ENV*));
 static int process_message __P((ENV*, DBT*, DBT*, int));
 static int reject_fwd __P((ENV *, REPMGR_CONNECTION *));
+static int rejoin_connections(ENV *);
+static int rejoin_deferred_election(ENV *);
 static int rescind_pending __P((ENV *,
 	DB_THREAD_INFO *, int, u_int32_t, u_int32_t));
 static int resolve_limbo_int __P((ENV *, DB_THREAD_INFO *));
@@ -33,9 +37,13 @@ static int send_permlsn_conn __P((ENV *,
 	REPMGR_CONNECTION *, u_int32_t, DB_LSN *));
 static int serve_join_request __P((ENV *,
 	DB_THREAD_INFO *, REPMGR_MESSAGE *));
+static int serve_lsnhist_request __P((ENV *, DB_THREAD_INFO *,
+	REPMGR_MESSAGE *));
+static int serve_readonly_master_request __P((ENV *, REPMGR_MESSAGE *));
 static int serve_remove_request __P((ENV *,
 	DB_THREAD_INFO *, REPMGR_MESSAGE *));
 static int serve_repmgr_request __P((ENV *, REPMGR_MESSAGE *));
+static int serve_restart_client_request __P((ENV *, REPMGR_MESSAGE *));
 
 /*
  * Map one of the phase-1/provisional membership status values to its
@@ -72,6 +80,7 @@ message_loop(env, th)
 	REPMGR_RUNNABLE *th;
 {
 	DB_REP *db_rep;
+	DB_THREAD_INFO *ip;
 	REP *rep;
 	REPMGR_MESSAGE *msg;
 	REPMGR_CONNECTION *conn;
@@ -83,6 +92,7 @@ message_loop(env, th)
 	COMPQUIET(membership, 0);
 	db_rep = env->rep_handle;
 	rep = db_rep->region;
+	ENV_ENTER(env, ip);
 	LOCK_MUTEX(db_rep->mutex);
 	while ((ret = __repmgr_queue_get(env, &msg, th)) == 0) {
 		incremented = FALSE;
@@ -141,7 +151,21 @@ message_loop(env, th)
 				 * detect it without the need for application
 				 * activity.
 				 */
-				ret = __rep_flush(env->dbenv);
+				ret = __rep_flush_int(env);
+			} else if (db_rep->prefmas_pending == master_switch &&
+			    IS_PREFMAS_MODE(env) &&
+			    FLD_ISSET(rep->config, REP_C_PREFMAS_MASTER) &&
+			    F_ISSET(rep, REP_F_CLIENT)) {
+				RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+"message_loop heartbeat preferred master switch"));
+				/*
+				 * We are a preferred master site currently
+				 * running as a client and we have finished
+				 * syncing with the temporary master.  It is
+				 * now time to take over as master.
+				 */
+				db_rep->prefmas_pending = no_action;
+				ret = preferred_master_takeover(env);
 			} else {
 				/*
 				 * Use heartbeat message to initiate rerequest
@@ -162,6 +186,12 @@ message_loop(env, th)
 			db_rep->non_rep_th--;
 		if (ret != 0)
 			goto out;
+		if (db_rep->view_mismatch) {
+			__db_errx(env, DB_STR("3699",
+    "Site is not recorded as a view in the group membership database"));
+			ret = EINVAL;
+			goto out;
+		}
 	}
 	/*
 	 * A return of DB_REP_UNAVAIL from __repmgr_queue_get() merely means we
@@ -171,6 +201,7 @@ message_loop(env, th)
 		ret = 0;
 out:
 	UNLOCK_MUTEX(db_rep->mutex);
+	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -341,16 +372,45 @@ process_message(env, control, rec, eid)
 		break;
 
 	case DB_REP_DUPMASTER:
-		/*
-		 * Initiate an election if we're configured to be using
-		 * elections, but only if we're *NOT* using leases.  When using
-		 * leases, there is never any uncertainty over which site is the
-		 * rightful master, and only the loser gets the DUPMASTER return
-		 * code.
-		 */
-		if ((ret = __repmgr_become_client(env)) == 0 &&
+		if (IS_PREFMAS_MODE(env) &&
+		    FLD_ISSET(rep->config, REP_C_PREFMAS_MASTER)) {
+			/*
+			 * The preferred master site must restart as a master
+			 * so that it sends out a NEWMASTER to help the client
+			 * sync.  It must force a role change so that it
+			 * advances its gen even though it is already master.
+			 * This is needed if there was a temporary master at
+			 * a higher gen that is now restarting as a client.
+			 * A client won't process messages from a master at
+			 * a lower gen than its own.
+			 */
+			ret = __repmgr_repstart(env, DB_REP_MASTER,
+			    REP_START_FORCE_ROLECHG);
+		} else if (IS_PREFMAS_MODE(env) &&
+		    FLD_ISSET(rep->config, REP_C_PREFMAS_CLIENT) &&
+		    (ret = __repmgr_become_client(env)) == 0) {
+			/*
+			 * The preferred master client site must restart as
+			 * client without any elections to enable the preferred
+			 * master site to preserve its own transactions.  It
+			 * uses an election thread to repeatedly perform client
+			 * startups so that it will perform its client sync
+			 * when the preferred master's gen has caught up.
+			 */
+			LOCK_MUTEX(db_rep->mutex);
+			ret = __repmgr_init_election(env,
+			    ELECT_F_CLIENT_RESTART);
+			UNLOCK_MUTEX(db_rep->mutex);
+		} else if ((ret = __repmgr_become_client(env)) == 0 &&
 		    FLD_ISSET(rep->config, REP_C_LEASE | REP_C_ELECTIONS)
 		    == REP_C_ELECTIONS) {
+			/*
+			 * Initiate an election if we're configured to be using
+			 * elections, but only if we're *NOT* using leases.
+			 * When using leases, there is never any uncertainty
+			 * over which site is the rightful master, and only the
+			 * loser gets the DUPMASTER return code.
+			 */
 			LOCK_MUTEX(db_rep->mutex);
 			ret = __repmgr_init_election(env, ELECT_F_IMMED);
 			UNLOCK_MUTEX(db_rep->mutex);
@@ -406,6 +466,14 @@ DB_TEST_RECOVERY_LABEL
 		t_ret = __op_rep_exit(env);
 		if (ret == ENOENT)
 			ret = 0;
+		else if (ret == DB_DELETED && db_rep->demotion_pending)
+			/*
+			 * If a demotion is in progress, we want to keep
+			 * the repmgr threads instead of bowing out because
+			 * they are needed when we rejoin the replication group
+			 * immediately as a view.
+			 */
+			ret = 0;
 		else if (ret == DB_DELETED)
 			ret = __repmgr_bow_out(env);
 		if (t_ret != 0 && ret == 0)
@@ -428,8 +496,10 @@ __repmgr_handle_event(env, event, info)
 	void *info;
 {
 	DB_REP *db_rep;
+	REP *rep;
 
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 
 	if (db_rep->selector == NULL) {
 		/* Repmgr is not in use, so all events go to application. */
@@ -457,9 +527,46 @@ __repmgr_handle_event(env, event, info)
 
 		/* Application still needs to see this. */
 		break;
+	case DB_EVENT_REP_MASTER:
+	case DB_EVENT_REP_STARTUPDONE:
+		/*
+		 * Detect a rare case where a dupmaster or incomplete gmdb
+		 * operation has left the site's gmdb inconsistent with
+		 * a view callback definition.  The user would have correctly
+		 * defined a view callback and called repmgr_start(), but the
+		 * gmdb operation to update this site to a view would have been
+		 * incomplete or rolled back.  The site cannot operate in this
+		 * inconsistent state, so set an indicator to cause a message
+		 * thread to panic and terminate.
+		 *
+		 * The one exception is during a demotion to view, when
+		 * this inconsistency is expected for a short time.
+		 */
+		if (IS_VALID_EID(db_rep->self_eid) &&
+		    PARTICIPANT_TO_VIEW(db_rep,
+		    SITE_FROM_EID(db_rep->self_eid)) &&
+		    !db_rep->demotion_pending)
+			db_rep->view_mismatch = TRUE;
+
+		/*
+		 * In preferred master mode, when the preferred master site
+		 * finishes synchronizing with the temporary master it must
+		 * prepare to take over as master.  This is detected by the
+		 * next heartbeat in a message thread, where the takeover is
+		 * actually performed.
+		 */
+		if (event == DB_EVENT_REP_STARTUPDONE &&
+		    IS_PREFMAS_MODE(env) &&
+		    FLD_ISSET(rep->config, REP_C_PREFMAS_MASTER)) {
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "startupdone set preferred master switch"));
+			db_rep->prefmas_pending = master_switch;
+		}
+		break;
 	default:
 		break;
 	}
+	COMPQUIET(info, NULL);
 	return (DB_EVENT_NOT_HANDLED);
 }
 
@@ -504,7 +611,7 @@ send_permlsn(env, generation, lsn)
 		 */
 		policy = site->ack_policy > 0 ?
 		    site->ack_policy : rep->perm_policy;
-		if (policy == DB_REPMGR_ACKS_NONE ||
+		if (IS_VIEW_SITE(env) || policy == DB_REPMGR_ACKS_NONE ||
 		    (IS_PEER_POLICY(policy) && rep->priority == 0))
 			ack = FALSE;
 		else
@@ -614,26 +721,149 @@ send_permlsn_conn(env, conn, generation, lsn)
 	return (ret);
 }
 
+/*
+ * Perform the steps on the preferred master site to take over again as
+ * preferred master from a temporary master.  This routine should only be
+ * called after the preferred master has restarted as a client and finished
+ * a client sync with the temporary master.
+ *
+ * This routine makes a best effort to wait until all temporary master
+ * transactions have been applied on this site before taking over.
+ */
+static int
+preferred_master_takeover(env)
+	ENV *env;
+{
+	DB_LOG *dblp;
+	DB_REP *db_rep;
+	LOG *lp;
+	REP *rep;
+	DB_LSN last_ready_lsn, ready_lsn, sync_lsn;
+	u_long usec;
+	u_int32_t gen, max_tries, tries;
+	int ret, synced;
+
+	dblp = env->lg_handle;
+	lp = dblp->reginfo.primary;
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	gen = 0;
+	ZERO_LSN(sync_lsn);
+	ret = 0;
+
+	if (!IS_PREFMAS_MODE(env))
+		return (ret);
+
+	/*
+	 * Start by making the temporary master a readonly master so that we
+	 * can know when we have applied all of its transactions on this
+	 * site before taking over.
+	 */
+	if ((ret = __repmgr_make_site_readonly_master(env,
+	    1, &gen, &sync_lsn)) != 0)
+		return (ret);
+	DB_ASSERT(env, gen >= rep->gen);
+
+	/*
+	 * Make a best effort to wait until this site has all transactions
+	 * from the temporary master.  We want to preserve temporary master
+	 * transactions, but we can't wait forever.  If we exceed our wait,
+	 * we restart this site as preferred master anyway.  This may
+	 * sacrifice some temporary master transactions in order to preserve
+	 * repgroup write availability.
+	 *
+	 * We restart the number of tries each time we make progress in
+	 * transactions applied, until either we apply through sync_lsn or
+	 * we exceed max_tries without progress.
+	 */
+	if ((ret = __repmgr_prefmas_get_wait(env, &max_tries, &usec)) != 0)
+		return (ret);
+	tries = 0;
+	synced = 0;
+	ZERO_LSN(ready_lsn);
+	ZERO_LSN(last_ready_lsn);
+	while (!synced && tries < max_tries) {
+		__os_yield(env, 0, usec);
+		tries++;
+		/*
+		 * lp->ready_lsn is the next LSN we expect to receive,
+		 * which also indicates how much we've applied.  sync_lsn
+		 * is the lp->lsn (indicating the next log record expected)
+		 * from the other site.
+		 */
+		MUTEX_LOCK(env, rep->mtx_clientdb);
+		ready_lsn = lp->ready_lsn;
+		MUTEX_UNLOCK(env, rep->mtx_clientdb);
+		if (gen == rep->gen && LOG_COMPARE(&ready_lsn, &sync_lsn) >= 0)
+			synced = 1;
+		else if (LOG_COMPARE(&ready_lsn, &last_ready_lsn) >= 0) {
+			/* We are making progress, restart number of tries. */
+			last_ready_lsn = ready_lsn;
+			tries = 0;
+		}
+	}
+
+	/* Restart the remote readonly temporary master as a client. */
+	if ((ret = __repmgr_restart_site_as_client(env, 1)) != 0)
+		return (ret);
+
+	/* Restart this site as the preferred master, waiting for
+	 * REP_LOCKOUT_MSG.  The NEWCLIENT message sent back from
+	 * restarting the other site as client can briefly lock
+	 * REP_LOCKOUT_MSG to do some cleanup.  We don't want this
+	 * to cause the rep_start_int() call to restart this site
+	 * as master to return 0 without doing anything.
+	 */
+	ret = __repmgr_become_master(env, REP_START_WAIT_LOCKMSG);
+	return (ret);
+}
+
 static int
 serve_repmgr_request(env, msg)
 	ENV *env;
 	REPMGR_MESSAGE *msg;
 {
-	DB_THREAD_INFO *ip;
+	DB_REP *db_rep;
 	DBT *dbt;
+	DB_THREAD_INFO *ip;
 	REPMGR_CONNECTION *conn;
+	u_int32_t mtype;
 	int ret, t_ret;
 
-	ENV_ENTER(env, ip);
-	switch (REPMGR_OWN_MSG_TYPE(msg->msg_hdr)) {
+	db_rep = env->rep_handle;
+	ENV_GET_THREAD_INFO(env, ip);
+	conn = msg->v.gmdb_msg.conn;
+	mtype = REPMGR_OWN_MSG_TYPE(msg->msg_hdr);
+	switch (mtype) {
 	case REPMGR_JOIN_REQUEST:
 		ret = serve_join_request(env, ip, msg);
 		break;
+	case REPMGR_LSNHIST_REQUEST:
+		ret = serve_lsnhist_request(env, ip, msg);
+		break;
+	case REPMGR_READONLY_MASTER:
+		ret = serve_readonly_master_request(env, msg);
+		break;
 	case REPMGR_REJOIN:
 		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
 		    "One try at rejoining group automatically"));
 		if ((ret = __repmgr_join_group(env)) == DB_REP_UNAVAIL)
 			ret = __repmgr_bow_out(env);
+		else if (ret == 0 && IS_PREFMAS_MODE(env)) {
+			/*
+			 * For preferred master mode, we need to get
+			 * a "regular" connection to the other site without
+			 * calling an election prematurely here.
+			 */
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "Establishing connections after rejoin"));
+			ret = rejoin_connections(env);
+		} else if (ret == 0 && db_rep->rejoin_pending) {
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "Calling deferred election after rejoin"));
+			ret = rejoin_deferred_election(env);
+		}
+		db_rep->rejoin_pending = FALSE;
 		break;
 	case REPMGR_REMOVE_REQUEST:
 		ret = serve_remove_request(env, ip, msg);
@@ -641,23 +871,32 @@ serve_repmgr_request(env, msg)
 	case REPMGR_RESOLVE_LIMBO:
 		ret = resolve_limbo_wrapper(env, ip);
 		break;
+	case REPMGR_RESTART_CLIENT:
+		ret = serve_restart_client_request(env, msg);
+		break;
 	case REPMGR_SHARING:
 		dbt = &msg->v.gmdb_msg.request;
-		ret = __repmgr_refresh_membership(env, dbt->data, dbt->size);
+		ret = __repmgr_refresh_membership(env, dbt->data, dbt->size,
+		    (conn == NULL ? DB_REPMGR_VERSION : conn->version));
 		break;
 	default:
 		ret = __db_unknown_path(env, "serve_repmgr_request");
 		break;
 	}
-	if ((conn = msg->v.gmdb_msg.conn) != NULL) {
+	if (conn != NULL) {
+		/*
+		 * A site that removed itself may have already closed its
+		 * connections.  Do not return an error and panic if we
+		 * can't close the one-shot GMDB connection for a remove
+		 * request here.
+		 */
 		if ((t_ret = __repmgr_close_connection(env, conn)) != 0 &&
-		    ret == 0)
+		    ret == 0 && mtype != REPMGR_REMOVE_REQUEST)
 			ret = t_ret;
 		if ((t_ret = __repmgr_decr_conn_ref(env, conn)) != 0 &&
 		    ret == 0)
 			ret = t_ret;
 	}
-	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -674,8 +913,10 @@ serve_join_request(env, ip, msg)
 {
 	DB_REP *db_rep;
 	REPMGR_CONNECTION *conn;
+	REPMGR_SITE *site;
 	DBT *dbt;
 	__repmgr_site_info_args site_info;
+	__repmgr_v4site_info_args v4site_info;
 	u_int8_t *buf;
 	char *host;
 	size_t len;
@@ -686,9 +927,18 @@ serve_join_request(env, ip, msg)
 	COMPQUIET(status, 0);
 
 	conn = msg->v.gmdb_msg.conn;
+	DB_ASSERT(env, conn->version > 0 && conn->version <= DB_REPMGR_VERSION);
 	dbt = &msg->v.gmdb_msg.request;
-	ret = __repmgr_site_info_unmarshal(env,
-	    &site_info, dbt->data, dbt->size, NULL);
+	if (conn->version < 5) {
+		ret = __repmgr_v4site_info_unmarshal(env,
+		    &v4site_info, dbt->data, dbt->size, NULL);
+		site_info.host = v4site_info.host;
+		site_info.port = v4site_info.port;
+		site_info.status = v4site_info.flags;
+		site_info.flags = 0;
+	} else
+		ret = __repmgr_site_info_unmarshal(env,
+		    &site_info, dbt->data, dbt->size, NULL);
 
 	host = site_info.host.data;
 	host[site_info.host.size - 1] = '\0';
@@ -703,7 +953,23 @@ serve_join_request(env, ip, msg)
 	LOCK_MUTEX(db_rep->mutex);
 	if ((ret = __repmgr_find_site(env, host, site_info.port, &eid)) == 0) {
 		DB_ASSERT(env, eid != db_rep->self_eid);
-		status = SITE_FROM_EID(eid)->membership;
+		site = SITE_FROM_EID(eid);
+		status = site->membership;
+		/*
+		 * Remote site electability is usually exchanged when
+		 * a connection is established, but when a new site
+		 * joins the repgroup there is a brief gap between the
+		 * join and the connection.  Record electability for
+		 * the joining site so that we are not overly conservative
+		 * about the number of acks we require for a PERM
+		 * transaction if the joining site is unelectable.
+		 */
+		if (FLD_ISSET(site_info.flags, SITE_JOIN_ELECTABLE)) {
+			F_SET(site, SITE_ELECTABLE);
+			FLD_CLR(site_info.flags, SITE_JOIN_ELECTABLE);
+		} else
+			F_CLR(site, SITE_ELECTABLE);
+		F_SET(site, SITE_HAS_PRIO);
 	}
 	UNLOCK_MUTEX(db_rep->mutex);
 	if (ret != 0)
@@ -712,7 +978,8 @@ serve_join_request(env, ip, msg)
 	switch (status) {
 	case 0:
 	case SITE_ADDING:
-		ret = __repmgr_update_membership(env, ip, eid, SITE_ADDING);
+		ret = __repmgr_update_membership(env, ip, eid, SITE_ADDING,
+			site_info.flags);
 		break;
 	case SITE_PRESENT:
 		/* Already in desired state. */
@@ -729,7 +996,7 @@ serve_join_request(env, ip, msg)
 		goto err;
 
 	LOCK_MUTEX(db_rep->mutex);
-	ret = __repmgr_marshal_member_list(env, &buf, &len);
+	ret = __repmgr_marshal_member_list(env, conn->version, &buf, &len);
 	UNLOCK_MUTEX(db_rep->mutex);
 	if (ret != 0)
 		goto err;
@@ -760,6 +1027,7 @@ serve_remove_request(env, ip, msg)
 	REPMGR_SITE *site;
 	DBT *dbt;
 	__repmgr_site_info_args site_info;
+	__repmgr_v4site_info_args v4site_info;
 	char *host;
 	u_int32_t status, type;
 	int eid, ret, t_ret;
@@ -768,9 +1036,18 @@ serve_remove_request(env, ip, msg)
 	db_rep = env->rep_handle;
 
 	conn = msg->v.gmdb_msg.conn;
+	DB_ASSERT(env, conn->version > 0 && conn->version <= DB_REPMGR_VERSION);
 	dbt = &msg->v.gmdb_msg.request;
-	ret = __repmgr_site_info_unmarshal(env,
-	    &site_info, dbt->data, dbt->size, NULL);
+	if (conn->version < 5) {
+		ret = __repmgr_v4site_info_unmarshal(env,
+		    &v4site_info, dbt->data, dbt->size, NULL);
+		site_info.host = v4site_info.host;
+		site_info.port = v4site_info.port;
+		site_info.status = v4site_info.flags;
+		site_info.flags = 0;
+	} else
+		ret = __repmgr_site_info_unmarshal(env,
+		    &site_info, dbt->data, dbt->size, NULL);
 
 	host = site_info.host.data;
 	host[site_info.host.size - 1] = '\0';
@@ -810,7 +1087,8 @@ serve_remove_request(env, ip, msg)
 		break;
 	case SITE_PRESENT:
 	case SITE_DELETING:
-		ret = __repmgr_update_membership(env, ip, eid, SITE_DELETING);
+		ret = __repmgr_update_membership(env, ip, eid, SITE_DELETING,
+			site_info.flags);
 		break;
 	default:
 		ret = __db_unknown_path(env, "serve_remove_request");
@@ -829,7 +1107,175 @@ err:
 	default:
 		return (ret);
 	}
-	return (__repmgr_send_sync_msg(env, conn, type, NULL, 0));
+	/*
+	 * It is possible when a site removes itself that by now it has
+	 * already acted on the first GMDB update and closed its connections.
+	 * Do not return an error and panic if we can't send the final
+	 * status of the remove operation.
+	 */
+	if ((ret = __repmgr_send_sync_msg(env, conn, type, NULL, 0)) != 0)
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "Problem sending remove site status message %d", ret));
+	return (0);
+}
+
+/*
+ * Serve the REPMGR_RESTART_CLIENT message by restarting this site as a
+ * client if it is not already a client.  Always sends back a
+ * REPMGR_PREFMAS_SUCCESS message with an empty payload.
+ */
+static int
+serve_restart_client_request(env, msg)
+	ENV *env;
+	REPMGR_MESSAGE *msg;
+{
+	DB_REP *db_rep;
+	REP * rep;
+	REPMGR_CONNECTION *conn;
+	int ret, t_ret;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	ret = 0;
+
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "Serving restart_client request"));
+	conn = msg->v.gmdb_msg.conn;
+	DB_ASSERT(env, conn->version > 0 && conn->version <= DB_REPMGR_VERSION);
+	/* No need to read payload - it is just a dummy byte. */
+
+	if (IS_PREFMAS_MODE(env) && !F_ISSET(rep, REP_F_CLIENT))
+		ret = __repmgr_become_client(env);
+
+	if ((t_ret = __repmgr_send_sync_msg(env, conn,
+	    REPMGR_PREFMAS_SUCCESS, NULL, 0)) != 0)
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "Problem sending restart client success message %d", ret));
+
+	if (ret == 0 && t_ret != 0)
+		ret = t_ret;
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "Request for restart_client returning %d", ret));
+	return (ret);
+}
+
+/*
+ * Serve the REPMGR_READONLY_MASTER message by turning this site into a
+ * readonly master.  Always sends back a REPMGR_READONLY_RESPONSE message with
+ * a payload containing this site's gen and next LSN expected.  If there are
+ * any errors, the gen is 0 and the next LSN is [0,0].
+ */
+static int
+serve_readonly_master_request(env, msg)
+	ENV *env;
+	REPMGR_MESSAGE *msg;
+{
+	REPMGR_CONNECTION *conn;
+	__repmgr_permlsn_args permlsn;
+	u_int8_t buf[__REPMGR_PERMLSN_SIZE];
+	int ret, t_ret;
+
+	ret = 0;
+
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "Serving readonly_master request"));
+	conn = msg->v.gmdb_msg.conn;
+	DB_ASSERT(env, conn->version > 0 && conn->version <= DB_REPMGR_VERSION);
+	/* No need to read payload - it is just a dummy byte. */
+
+	if (IS_PREFMAS_MODE(env))
+		ret = __rep_become_readonly_master(env,
+		    &permlsn.generation, &permlsn.lsn);
+
+	__repmgr_permlsn_marshal(env, &permlsn, buf);
+	if ((t_ret = __repmgr_send_sync_msg(env, conn,
+	    REPMGR_READONLY_RESPONSE, buf, __REPMGR_PERMLSN_SIZE)) != 0)
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "Problem sending readonly response message %d", ret));
+	if (ret == 0 && t_ret != 0)
+		ret = t_ret;
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "Request for readonly_master returning %d", ret));
+	return (ret);
+}
+
+/*
+ * Serve the REPMGR_LSNHIST_REQUEST message by retrieving information from
+ * this site's LSN history database for the requested gen.  If the requested
+ * gen exists at this site, sends back a REPMGR_LSNHIST_RESPONSE message
+ * containing the LSN and timestamp at the requested gen and the LSN for the
+ * next gen if that gen exists (next gen LSN is [0,0] if next gen doesn't
+ * yet exist at this site.)  Sends back a PREFMAS_FAILURE message if the
+ * requested gen does not yet exist at this site or if there are any errors.
+ */
+static int
+serve_lsnhist_request(env, ip, msg)
+	ENV *env;
+	DB_THREAD_INFO *ip;
+	REPMGR_MESSAGE *msg;
+{
+	REPMGR_CONNECTION *conn;
+	DBT *dbt;
+	__repmgr_lsnhist_match_args lsnhist_match;
+	__rep_lsn_hist_data_args lsnhist_data, next_lsnhist_data;
+	__rep_lsn_hist_key_args key;
+	u_int8_t match_buf[__REPMGR_LSNHIST_MATCH_SIZE];
+	DB_LSN next_gen_lsn;
+	int ret, t_ret;
+
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC, "Serving lsnhist request"));
+	conn = msg->v.gmdb_msg.conn;
+	DB_ASSERT(env, conn->version > 0 && conn->version <= DB_REPMGR_VERSION);
+	/* Read lsn_hist_key incoming payload to get gen being requested. */
+	dbt = &msg->v.gmdb_msg.request;
+	if ((ret = __rep_lsn_hist_key_unmarshal(env,
+	    &key, dbt->data, dbt->size, NULL)) != 0)
+		return (ret);
+	if (key.version != REP_LSN_HISTORY_FMT_VERSION) {
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "serve_lsnhist_request version mismatch"));
+		return (0);
+	}
+
+	/*
+	 * There's no need to retry if we don't find an lsnhist record for
+	 * requested gen.  This site is either a temporary master or a client,
+	 * which means that if it doesn't already have an lsnhist record at
+	 * this gen, it is highly unlikely to get one in the near future.
+	 */
+	if ((ret = __rep_get_lsnhist_data(env,
+	    ip, key.gen, &lsnhist_data)) == 0) {
+
+		if ((t_ret = __rep_get_lsnhist_data(env,
+		    ip, key.gen + 1, &next_lsnhist_data)) == 0)
+			next_gen_lsn = next_lsnhist_data.lsn;
+		else
+			ZERO_LSN(next_gen_lsn);
+
+		lsnhist_match.lsn = lsnhist_data.lsn;
+		lsnhist_match.hist_sec = lsnhist_data.hist_sec;
+		lsnhist_match.hist_nsec = lsnhist_data.hist_nsec;
+		lsnhist_match.next_gen_lsn = next_gen_lsn;
+		__repmgr_lsnhist_match_marshal(env, &lsnhist_match, match_buf);
+		if ((t_ret = __repmgr_send_sync_msg(env, conn,
+		    REPMGR_LSNHIST_RESPONSE, match_buf,
+		    __REPMGR_LSNHIST_MATCH_SIZE)) != 0)
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "Problem sending lsnhist response message %d",
+			    ret));
+	} else if ((t_ret = __repmgr_send_sync_msg(env, conn,
+	    REPMGR_PREFMAS_FAILURE, NULL, 0)) != 0)
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "Problem sending prefmas failure message %d", ret));
+
+	/* Do not return an error if LSN history record not found. */
+	if (ret == DB_NOTFOUND)
+		ret = 0;
+	if (ret == 0 && t_ret != 0)
+		ret = t_ret;
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "Request for lsnhist returning %d", ret));
+	return (ret);
 }
 
 /*
@@ -917,7 +1363,13 @@ resolve_limbo_int(env, ip)
 	if (orig_status == SITE_PRESENT || orig_status == 0)
 		goto out;
 
-	if (IS_ZERO_LSN(db_rep->limbo_failure))
+	/*
+	 * It is possible after an autotakeover on a master to have no
+	 * limbo_failure LSN but to have a limbo_victim that was found
+	 * in the gmdb that still needs to be resolved.
+	 */
+	if (IS_ZERO_LSN(db_rep->limbo_failure) &&
+	    !db_rep->limbo_resolution_needed)
 		goto out;
 
 	/*
@@ -947,7 +1399,8 @@ resolve_limbo_int(env, ip)
 		    ip, NULL, &txn, DB_IGNORE_LEASE)) != 0)
 			goto out;
 
-		marshal_site_data(env, orig_status, data_buf, &data_dbt);
+		marshal_site_data(env,
+		    orig_status, site->gmdb_flags, data_buf, &data_dbt);
 
 		ret = __db_put(db_rep->gmdb, ip, txn, &key_dbt, &data_dbt, 0);
 		if ((t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) != 0 &&
@@ -980,15 +1433,15 @@ resolve_limbo_int(env, ip)
 	UNLOCK_MUTEX(db_rep->mutex);
 	locked = FALSE;
 	status = NEXT_STATUS(orig_status);
-	if ((ret = finish_gmdb_update(env,
-	    ip, &key_dbt, orig_status, status, &logrec)) != 0)
+	if ((ret = finish_gmdb_update(env, ip,
+	    &key_dbt, orig_status, status, site->gmdb_flags, &logrec)) != 0)
 		goto out;
 
 	/* Track modified membership status in our in-memory sites array. */
 	LOCK_MUTEX(db_rep->mutex);
 	locked = TRUE;
 	if ((ret = __repmgr_set_membership(env,
-	    addr.host, addr.port, status)) != 0)
+	    addr.host, addr.port, status, site->gmdb_flags)) != 0)
 		goto out;
 	__repmgr_set_sites(env);
 
@@ -1005,14 +1458,15 @@ out:
  * status is inferred (ADDING -> PRESENT, or DELETING -> 0).
  *
  * PUBLIC: int __repmgr_update_membership __P((ENV *,
- * PUBLIC:     DB_THREAD_INFO *, int, u_int32_t));
+ * PUBLIC:     DB_THREAD_INFO *, int, u_int32_t, u_int32_t));
  */
 int
-__repmgr_update_membership(env, ip, eid, pstatus)
+__repmgr_update_membership(env, ip, eid, pstatus, site_flags)
 	ENV *env;
 	DB_THREAD_INFO *ip;
 	int eid;
 	u_int32_t pstatus;	/* Provisional status. */
+	u_int32_t site_flags;
 {
 	DB_REP *db_rep;
 	REPMGR_SITE *site;
@@ -1092,7 +1546,7 @@ retry:
 	 * those seem even more confusing.
 	 */
 	if ((ret = __repmgr_set_membership(env,
-	    addr.host, addr.port, pstatus)) != 0)
+	    addr.host, addr.port, pstatus, site_flags)) != 0)
 		goto err;
 	__repmgr_set_sites(env);
 
@@ -1108,7 +1562,7 @@ retry:
 	if ((ret = __txn_begin(env, ip, NULL, &txn, DB_IGNORE_LEASE)) != 0)
 		goto err;
 	marshal_site_key(env, &addr, key_buf, &key_dbt, &logrec);
-	marshal_site_data(env, pstatus, status_buf, &data_dbt);
+	marshal_site_data(env, pstatus, site_flags, status_buf, &data_dbt);
 	if ((ret = __db_put(db_rep->gmdb,
 	    ip, txn, &key_dbt, &data_dbt, 0)) != 0)
 		goto err;
@@ -1152,13 +1606,14 @@ retry:
 	locked = FALSE;
 
 	if ((ret = finish_gmdb_update(env, ip,
-	    &key_dbt, pstatus, ult_status, &logrec)) != 0)
+	    &key_dbt, pstatus, ult_status, site_flags, &logrec)) != 0)
 		goto err;
 
 	/* Track modified membership status in our in-memory sites array. */
 	LOCK_MUTEX(db_rep->mutex);
 	locked = TRUE;
-	ret = __repmgr_set_membership(env, addr.host, addr.port, ult_status);
+	ret = __repmgr_set_membership(env, addr.host, addr.port,
+	    ult_status, site_flags);
 	__repmgr_set_sites(env);
 
 err:
@@ -1173,7 +1628,7 @@ err:
 		 * that we keep in sync.
 		 */
 		(void)__repmgr_set_membership(env,
-		    addr.host, addr.port, orig_status);
+		    addr.host, addr.port, orig_status, site_flags);
 	}
 	if ((t_ret = __repmgr_cleanup_gmdb_op(env, do_close)) != 0 &&
 	    ret == 0)
@@ -1215,13 +1670,14 @@ retry:
 	UNLOCK_MUTEX(db_rep->mutex);
 
 	marshal_site_key(env, &addr, key_buf, &key_dbt, &logrec);
-	if ((ret = finish_gmdb_update(env,
-	    ip, &key_dbt, cur_status, new_status, &logrec)) != 0)
+	if ((ret = finish_gmdb_update(env, ip,
+	    &key_dbt, cur_status, new_status, site->gmdb_flags, &logrec)) != 0)
 		goto err;
 
 	/* Track modified membership status in our in-memory sites array. */
 	LOCK_MUTEX(db_rep->mutex);
-	ret = __repmgr_set_membership(env, addr.host, addr.port, new_status);
+	ret = __repmgr_set_membership(env, addr.host, addr.port,
+	    new_status, site->gmdb_flags);
 	__repmgr_set_sites(env);
 	UNLOCK_MUTEX(db_rep->mutex);
 
@@ -1301,11 +1757,11 @@ __repmgr_set_gm_version(env, ip, txn, version)
  * really deleted.
  */
 static int
-finish_gmdb_update(env, ip, key_dbt, prev_status, status, logrec)
+finish_gmdb_update(env, ip, key_dbt, prev_status, status, flags, logrec)
 	ENV *env;
 	DB_THREAD_INFO *ip;
 	DBT *key_dbt;
-	u_int32_t prev_status, status;
+	u_int32_t prev_status, status, flags;
 	__repmgr_member_args *logrec;
 {
 	DB_REP *db_rep;
@@ -1324,7 +1780,7 @@ finish_gmdb_update(env, ip, key_dbt, prev_status, status, logrec)
 	if (status == 0)
 		ret = __db_del(db_rep->gmdb, ip, txn, key_dbt, 0);
 	else {
-		marshal_site_data(env, status, data_buf, &data_dbt);
+		marshal_site_data(env, status, flags, data_buf, &data_dbt);
 		ret = __db_put(db_rep->gmdb, ip, txn, key_dbt, &data_dbt, 0);
 	}
 	if (ret != 0)
@@ -1617,16 +2073,18 @@ marshal_site_key(env, addr, buf, dbt, logrec)
 }
 
 static void
-marshal_site_data(env, status, buf, dbt)
+marshal_site_data(env, status, flags, buf, dbt)
 	ENV *env;
 	u_int32_t status;
+	u_int32_t flags;
 	u_int8_t *buf;
 	DBT *dbt;
 {
-	__repmgr_membership_data_args member_status;
+	__repmgr_membership_data_args member_data;
 
-	member_status.flags = status;
-	__repmgr_membership_data_marshal(env, &member_status, buf);
+	member_data.status = status;
+	member_data.flags = flags;
+	__repmgr_membership_data_marshal(env, &member_data, buf);
 	DB_INIT_DBT(*dbt, buf, __REPMGR_MEMBERSHIP_DATA_SIZE);
 }
 
@@ -1640,16 +2098,107 @@ __repmgr_set_sites(env)
 	ENV *env;
 {
 	DB_REP *db_rep;
+	REP *rep;
 	int ret;
 	u_int32_t n;
 	u_int i;
 
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 
 	for (i = 0, n = 0; i < db_rep->site_cnt; i++) {
-		if (db_rep->sites[i].membership > 0)
+		/*
+		 * Views do not count towards nsites because they cannot
+		 * vote in elections, become master or contribute to
+		 * durability.
+		 */
+		if (db_rep->sites[i].membership > 0 &&
+		    !FLD_ISSET(db_rep->sites[i].gmdb_flags, SITE_VIEW))
 			n++;
 	}
 	ret = __rep_set_nsites_int(env, n);
 	DB_ASSERT(env, ret == 0);
+	if (FLD_ISSET(rep->config,
+	    REP_C_PREFMAS_MASTER | REP_C_PREFMAS_CLIENT) &&
+	    rep->config_nsites > 2)
+		__db_errx(env, DB_STR("3701",
+	    "More than two sites in preferred master replication group"));
+}
+
+/*
+ * If a site is rejoining a 2-site repgroup with 2SITE_STRICT off
+ * and has a rejection because it needs to catch up with the latest
+ * group membership database, it cannot call an election right away
+ * because it would win with only its own vote and ignore an existing
+ * master in the repgroup.  Instead, this routine is used to call the
+ * deferred election after the site has rejoined the repgroup successfully.
+ */
+static int
+rejoin_deferred_election(env)
+	ENV *env;
+{
+	DB_REP *db_rep;
+	u_int32_t flags;
+	int eid, ret;
+
+	db_rep = env->rep_handle;
+	LOCK_MUTEX(db_rep->mutex);
+
+	/*
+	 * First, retry all connections so that the election can communicate
+	 * with the other sites.  Normally there should only be one other
+	 * site in the repgroup, but it is safest to retry all remote sites
+	 * found in case the group membership changed while we were gone.
+	 */
+	FOR_EACH_REMOTE_SITE_INDEX(eid) {
+		if ((ret =
+		    __repmgr_schedule_connection_attempt(env, eid, TRUE)) != 0)
+			break;
+	}
+
+	/*
+	 * Call an immediate, but not a fast, election because a fast
+	 * election reduces the number of votes needed by 1.
+	 */
+	flags = ELECT_F_EVENT_NOTIFY;
+	if (FLD_ISSET(db_rep->region->config, REP_C_ELECTIONS))
+		LF_SET(ELECT_F_IMMED);
+	else
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "Deferred rejoin election, but no elections"));
+	ret = __repmgr_init_election(env, flags);
+
+	UNLOCK_MUTEX(db_rep->mutex);
+	return (ret);
+}
+/*
+ * If a site is rejoining a preferred master replication group and has a
+ * rejection because it needs to catch up with the latest group membership
+ * database, it needs to establish its "regular" connection to the other site
+ * so that it can proceed through the preferred master startup sequence.
+ */
+static int
+rejoin_connections(env)
+	ENV *env;
+{
+	DB_REP *db_rep;
+	int eid, ret;
+
+	db_rep = env->rep_handle;
+	ret = 0;
+	LOCK_MUTEX(db_rep->mutex);
+
+	/*
+	 * Retry all connections.   Normally there should only be one other
+	 * site in the repgroup, but it is safest to retry all remote sites
+	 * found in case the group membership changed while we were gone.
+	 */
+	FOR_EACH_REMOTE_SITE_INDEX(eid) {
+		if ((ret =
+		    __repmgr_schedule_connection_attempt(env, eid, TRUE)) != 0)
+			break;
+	}
+
+	UNLOCK_MUTEX(db_rep->mutex);
+	return (ret);
 }
diff --git a/src/repmgr/repmgr_net.c b/src/repmgr/repmgr_net.c
index 54e3d066..334fd150 100644
--- a/src/repmgr/repmgr_net.c
+++ b/src/repmgr/repmgr_net.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -57,6 +57,7 @@ struct sending_msg {
  * whether the PERM message should be considered durable.
  */
 struct repmgr_permanence {
+	u_int32_t gen;		/* Master generation for LSN. */
 	DB_LSN lsn;		/* LSN whose ack this thread is waiting for. */
 	u_int threshold;	/* Number of client acks to wait for. */
 	u_int quorum;		/* Durability threshold for QUORUM policy. */
@@ -378,7 +379,7 @@ __repmgr_send(dbenv, control, rec, lsnp, eid, flags)
 			goto out;
 #undef	SEND_ONE_CONNECTION
 
-		nsites_sent = 1;
+		nsites_sent = FLD_ISSET(site->gmdb_flags, SITE_VIEW) ? 0 : 1;
 		npeers_sent = F_ISSET(site, SITE_ELECTABLE) ? 1 : 0;
 		missed_peer = FALSE;
 	}
@@ -418,7 +419,13 @@ __repmgr_send(dbenv, control, rec, lsnp, eid, flags)
 				nclients = 0;
 			else if ((policy == DB_REPMGR_ACKS_ONE ||
 			    policy == DB_REPMGR_ACKS_ONE_PEER) &&
-			    nclients == 1) {
+			    nclients < 2) {
+				/*
+				 * Adjust to QUORUM when first other
+				 * participant joins (nclients=1) or when there
+				 * are no other participants but a view joins
+				 * (nclients=0) to get enough acks.
+				 */
 				nclients = 0;
 				policy = DB_REPMGR_ACKS_QUORUM;
 			}
@@ -498,9 +505,16 @@ __repmgr_send(dbenv, control, rec, lsnp, eid, flags)
 			if (nclients > 1 ||
 			    FLD_ISSET(db_rep->region->config,
 			    REP_C_2SITE_STRICT) ||
-			    db_rep->active_gmdb_update == gmdb_primary)
+			    db_rep->active_gmdb_update == gmdb_primary) {
 				quorum = nclients / 2;
-			else
+				/*
+				 * An unelectable master can't be part of the
+				 * QUORUM policy quorum.
+				 */
+				if (rep->priority == 0 &&
+				    policy == DB_REPMGR_ACKS_QUORUM)
+					quorum++;
+			} else
 				quorum = nclients;
 
 			if (policy == DB_REPMGR_ACKS_ALL_AVAILABLE) {
@@ -560,6 +574,7 @@ __repmgr_send(dbenv, control, rec, lsnp, eid, flags)
 		/* In ALL_PEERS case, display of "needed" might be confusing. */
 		VPRINT(env, (env, DB_VERB_REPMGR_MISC,
 		    "will await acknowledgement: need %u", needed));
+		perm.gen = rep->gen;
 		perm.lsn = *lsnp;
 		perm.threshold = needed;
 		perm.policy = policy;
@@ -734,8 +749,13 @@ __repmgr_send_broadcast(env, type, control, rec, nsitesp, npeersp, missingp)
 		 * useful to keep letting a removed site see updates so that it
 		 * learns of its own removal, and will know to rejoin at its
 		 * next reboot.
+		 *
+		 * We never count sends to views because views cannot
+		 * contribute to durability, but we always do the sends.
 		 */
-		if (site->membership == SITE_PRESENT)
+		if (FLD_ISSET(site->gmdb_flags, SITE_VIEW))
+			full_member = FALSE;
+		else if (site->membership == SITE_PRESENT)
 			full_member = TRUE;
 		else {
 			full_member = FALSE;
@@ -802,7 +822,9 @@ send_connection(env, type, conn, msg, sent)
 		REPMGR_MAX_V1_MSG_TYPE,
 		REPMGR_MAX_V2_MSG_TYPE,
 		REPMGR_MAX_V3_MSG_TYPE,
-		REPMGR_MAX_V4_MSG_TYPE
+		REPMGR_MAX_V4_MSG_TYPE,
+		REPMGR_MAX_V5_MSG_TYPE,
+		REPMGR_MAX_V6_MSG_TYPE
 	};
 
 	db_rep = env->rep_handle;
@@ -1132,18 +1154,24 @@ got_acks(env, context)
 	has_unacked_peer = FALSE;
 	FOR_EACH_REMOTE_SITE_INDEX(eid) {
 		site = SITE_FROM_EID(eid);
-		if (site->membership != SITE_PRESENT)
+		/*
+		 * Do not count an ack from a view because a view cannot
+		 * contribute to durability.
+		 */
+		if (FLD_ISSET(site->gmdb_flags, SITE_VIEW))
 			continue;
 		if (!F_ISSET(site, SITE_HAS_PRIO)) {
 			/*
-			 * Never connected to this site: since we can't know
-			 * whether it's a peer, assume the worst.
+			 * We have not reconnected to this site since the last
+			 * recovery.  Since we don't yet know whether it's a
+			 * peer, assume the worst.
 			 */
 			has_unacked_peer = TRUE;
 			continue;
 		}
 
-		if (LOG_COMPARE(&site->max_ack, &perm->lsn) >= 0) {
+		if (site->max_ack_gen == perm->gen &&
+		    LOG_COMPARE(&site->max_ack, &perm->lsn) >= 0) {
 			sites_acked++;
 			if (F_ISSET(site, SITE_ELECTABLE))
 				peers_acked++;
@@ -1206,6 +1234,7 @@ __repmgr_bust_connection(env, conn)
 	DB_REP *db_rep;
 	REP *rep;
 	REPMGR_SITE *site;
+	db_timespec now;
 	u_int32_t flags;
 	int ret, eid;
 
@@ -1259,7 +1288,9 @@ __repmgr_bust_connection(env, conn)
 	} else			/* Subordinate connection. */
 		goto out;
 
-	if ((ret = __repmgr_schedule_connection_attempt(env, eid, FALSE)) != 0)
+	/* Defer connection attempt if rejoining 2SITE_STRICT=off repgroup. */
+	if (!db_rep->rejoin_pending &&
+	    (ret = __repmgr_schedule_connection_attempt(env, eid, FALSE)) != 0)
 		goto out;
 
 	/*
@@ -1267,11 +1298,47 @@ __repmgr_bust_connection(env, conn)
 	 * master, assume that the master may have failed, and call for
 	 * an election.  But only do this for the connection to the main
 	 * master process, not a subordinate one.  And only do it if
-	 * we're our site's main process, not a subordinate one.  And
+	 * we're our site's listener process, not a subordinate one.  And
 	 * skip it if the application has configured us not to do
 	 * elections.
 	 */
 	if (!IS_SUBORDINATE(db_rep) && eid == rep->master_id) {
+		if (FLD_ISSET(rep->config, REP_C_AUTOTAKEOVER)) {
+			/*
+			 * When the connection is from master's listener, if
+			 * there is any other connection from a master's
+			 * subordinate process that could take over as
+			 * listener, we delay the election to allow some time
+			 * for a new master listener to start.  At the end of
+			 * the delay, if there is still no master listener,
+			 * call an election.  There is a slight chance that
+			 * we will delay the election to wait for an inactive
+			 * connection which would never become the next main
+			 * connection.
+			 */
+			TAILQ_FOREACH(conn, &site->sub_conns, entries) {
+				if (conn->auto_takeover) {
+					if (!timespecisset(
+					    &db_rep->m_listener_chk)) {
+						__os_gettime(env, &now, 1);
+						TIMESPEC_ADD_DB_TIMEOUT(&now,
+						    db_rep->m_listener_wait);
+						db_rep->m_listener_chk = now;
+					}
+					RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		"Master failure, but delay elections for takeover on master"));
+					return (0);
+				}
+			}
+		}
+
+		/* Defer election if rejoining 2SITE_STRICT=off repgroup. */
+		if (db_rep->rejoin_pending) {
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "Deferring election after rejoin rejection"));
+			goto out;
+		}
+
 		/*
 		 * Even if we're not doing elections, defer the event
 		 * notification to later execution in the election
@@ -1285,6 +1352,17 @@ __repmgr_bust_connection(env, conn)
 			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
 			    "Master failure, but no elections"));
 
+		/*
+		 * In preferred master mode, a client that has lost its
+		 * connection to the master uses an election thread to
+		 * restart as master.
+		 */
+		if (IS_PREFMAS_MODE(env)) {
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+"bust_connection setting preferred master temp master"));
+			db_rep->prefmas_pending = start_temp_master;
+		}
+
 		if ((ret = __repmgr_init_election(env, flags)) != 0)
 			goto out;
 	}
@@ -1340,25 +1418,59 @@ __repmgr_disable_connection(env, conn)
 	REPMGR_CONNECTION *conn;
 {
 	DB_REP *db_rep;
-	REPMGR_SITE *site;
+	REP *rep;
 	REPMGR_RESPONSE *resp;
+	REPMGR_SITE *site;
+	SITEINFO *sites;
 	u_int32_t i;
-	int eid, ret, t_ret;
+	int eid, is_subord, orig_state, ret, t_ret;
 
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 	ret = 0;
+	is_subord = 0;
 
+	orig_state = conn->state;
 	conn->state = CONN_DEFUNCT;
 	if (conn->type == REP_CONNECTION) {
 		eid = conn->eid;
 		if (IS_VALID_EID(eid)) {
 			site = SITE_FROM_EID(eid);
 			if (conn != site->ref.conn.in &&
-			    conn != site->ref.conn.out)
-				/* It's a subordinate connection. */
+			    conn != site->ref.conn.out) {
+				/*
+				 * It is a subordinate connection to disable.
+				 * Remove it from the subordinate connection
+				 * list, and decrease the number of listener
+				 * candidates by 1 if it is from a subordinate
+				 * rep-aware process that allows takeover.
+				 */
 				TAILQ_REMOVE(&site->sub_conns, conn, entries);
+				SET_LISTENER_CAND(conn->auto_takeover, --);
+				is_subord = 1;
+			}
 			TAILQ_INSERT_TAIL(&db_rep->connections, conn, entries);
 			conn->ref_count++;
+			/*
+			 * Do not decrease sites_avail for a subordinate
+			 * connection.
+			 */
+			if (site->state == SITE_CONNECTED && !is_subord &&
+			    (orig_state == CONN_READY ||
+			    orig_state == CONN_CONGESTED)) {
+				/*
+				 * Some thread orderings can cause a brief
+				 * dip into a negative sites_avail value.
+				 * Once it goes negative it stays negative,
+				 * so avoid this.  Future connections will
+				 * be counted correctly.
+				 */
+				if (rep->sites_avail > 0)
+					rep->sites_avail--;
+				RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "disable_conn: EID %lu disabled.  sites_avail %lu",
+				    (u_long)eid, (u_long)rep->sites_avail));
+			}
 		}
 		conn->eid = -1;
 	} else if (conn->type == APP_CONNECTION) {
@@ -1646,8 +1758,10 @@ flatten(env, msg)
 }
 
 /*
- * Scan the list of remote sites, returning the first one that is a peer,
- * is not the current master, and is available.
+ * Scan the list of remote sites, returning the first participant that is a
+ * peer, is not the current master, and is available.  If there are no
+ * available participant peers but there is an available view peer, return the
+ * first available view peer.
  */
 static REPMGR_SITE *
 __repmgr_find_available_peer(env)
@@ -1656,23 +1770,28 @@ __repmgr_find_available_peer(env)
 	DB_REP *db_rep;
 	REP *rep;
 	REPMGR_CONNECTION *conn;
-	REPMGR_SITE *site;
-	u_int i;
+	REPMGR_SITE *site, *view;
+	u_int avail, i;
 
 	db_rep = env->rep_handle;
 	rep = db_rep->region;
+	view = NULL;
 	FOR_EACH_REMOTE_SITE_INDEX(i) {
 		site = &db_rep->sites[i];
-		if (FLD_ISSET(site->config, DB_REPMGR_PEER) &&
-		    EID_FROM_SITE(site) != rep->master_id &&
-		    site->state == SITE_CONNECTED &&
+		avail = (site->state == SITE_CONNECTED &&
 		    (((conn = site->ref.conn.in) != NULL &&
 		    conn->state == CONN_READY) ||
 		    ((conn = site->ref.conn.out) != NULL &&
-		    conn->state == CONN_READY)))
+		    conn->state == CONN_READY)));
+		if (FLD_ISSET(site->config, DB_REPMGR_PEER) &&
+		    !FLD_ISSET(site->gmdb_flags, SITE_VIEW) &&
+		    EID_FROM_SITE(site) != rep->master_id && avail)
 			return (site);
+		if (!view && FLD_ISSET(site->config, DB_REPMGR_PEER) &&
+		    FLD_ISSET(site->gmdb_flags, SITE_VIEW) && avail)
+			view = site;
 	}
-	return (NULL);
+	return (view);
 }
 
 /*
@@ -1852,6 +1971,7 @@ __repmgr_net_close(env)
 			site->ref.conn.out = NULL;
 		}
 	}
+	rep->sites_avail = 0;
 
 	if (db_rep->listen_fd != INVALID_SOCKET) {
 		if (closesocket(db_rep->listen_fd) == SOCKET_ERROR && ret == 0)
@@ -1870,22 +1990,28 @@ final_cleanup(env, conn, unused)
 	void *unused;
 {
 	DB_REP *db_rep;
+	REP *rep;
 	REPMGR_SITE *site;
-	int ret, t_ret;
+	SITEINFO *sites;
+	int eid, ret, t_ret;
 
 	COMPQUIET(unused, NULL);
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	eid = conn->eid;
 
 	ret = __repmgr_close_connection(env, conn);
 	/* Remove the connection from whatever list it's on, if any. */
-	if (conn->type == REP_CONNECTION && IS_VALID_EID(conn->eid)) {
-		site = SITE_FROM_EID(conn->eid);
+	if (conn->type == REP_CONNECTION && IS_VALID_EID(eid)) {
+		site = SITE_FROM_EID(eid);
 
 		if (site->state == SITE_CONNECTED &&
 		    (conn == site->ref.conn.in || conn == site->ref.conn.out)) {
 			/* Not on any list, so no need to do anything. */
-		} else
+		} else {
 			TAILQ_REMOVE(&site->sub_conns, conn, entries);
+			SET_LISTENER_CAND(conn->auto_takeover, --);
+		}
 		t_ret = __repmgr_destroy_conn(env, conn);
 
 	} else {
diff --git a/src/repmgr/repmgr_posix.c b/src/repmgr/repmgr_posix.c
index 0687681a..c49017ff 100644
--- a/src/repmgr/repmgr_posix.c
+++ b/src/repmgr/repmgr_posix.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/repmgr/repmgr_queue.c b/src/repmgr/repmgr_queue.c
index 6a381acf..3a51b32b 100644
--- a/src/repmgr/repmgr_queue.c
+++ b/src/repmgr/repmgr_queue.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2006, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2006, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -22,13 +22,28 @@ __repmgr_queue_destroy(env)
 	ENV *env;
 {
 	DB_REP *db_rep;
+	REP *rep;
 	REPMGR_MESSAGE *m;
 	REPMGR_CONNECTION *conn;
+	u_int32_t mtype;
 	int ret, t_ret;
 
+	COMPQUIET(mtype, 0);
+
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 
 	ret = 0;
+
+	/*
+	 * Turn on the DB_EVENT_REP_INQUEUE_FULL event firing.  We only do
+	 * this for the main listener process.  For a subordinate process,
+	 * it is always turned on.
+	 */
+	if (!STAILQ_EMPTY(&db_rep->input_queue.header) &&
+	    !IS_SUBORDINATE(db_rep))
+		rep->inqueue_full_event_on = 1;
+
 	while (!STAILQ_EMPTY(&db_rep->input_queue.header)) {
 		m = STAILQ_FIRST(&db_rep->input_queue.header);
 		STAILQ_REMOVE_HEAD(&db_rep->input_queue.header, entries);
@@ -38,8 +53,25 @@ __repmgr_queue_destroy(env)
 			    ret == 0)
 				ret = t_ret;
 		}
+		if (m->msg_hdr.type == REPMGR_OWN_MSG) {
+			mtype = REPMGR_OWN_MSG_TYPE(m->msg_hdr);
+			if ((conn = m->v.gmdb_msg.conn) != NULL) {
+				/*
+				 * A site that removed itself may have already
+				 * closed its connections.
+				 */
+				if ((t_ret = __repmgr_close_connection(env,
+				    conn)) != 0 && ret == 0 &&
+				    mtype != REPMGR_REMOVE_REQUEST)
+					ret = t_ret;
+				if ((t_ret = __repmgr_decr_conn_ref(env,
+				    conn)) != 0 && ret == 0)
+					ret = t_ret;
+			}
+		}
 		__os_free(env, m);
 	}
+
 	return (ret);
 }
 
@@ -60,14 +92,17 @@ __repmgr_queue_get(env, msgp, th)
 	REPMGR_RUNNABLE *th;
 {
 	DB_REP *db_rep;
+	REP *rep;
 	REPMGR_MESSAGE *m;
 #ifdef DB_WIN32
 	HANDLE wait_events[2];
 #endif
+	u_int32_t msgsize;
 	int ret;
 
 	ret = 0;
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 
 	while ((m = available_work(env)) == NULL &&
 	    db_rep->repmgr_status == running && !th->quit_requested) {
@@ -104,10 +139,42 @@ __repmgr_queue_get(env, msgp, th)
 	else {
 		STAILQ_REMOVE(&db_rep->input_queue.header,
 		    m, __repmgr_message, entries);
-		db_rep->input_queue.size--;
+		msgsize = (u_int32_t)m->size;
+		while (msgsize >= GIGABYTE) {
+			DB_ASSERT(env, db_rep->input_queue.gbytes > 0);
+			db_rep->input_queue.gbytes--;
+			msgsize -= GIGABYTE;
+		}
+		if (db_rep->input_queue.bytes < msgsize) {
+			DB_ASSERT(env, db_rep->input_queue.gbytes > 0);
+			db_rep->input_queue.gbytes--;
+			db_rep->input_queue.bytes += GIGABYTE;
+		}
+		db_rep->input_queue.bytes -= msgsize;
+
+		/*
+		 * Check if current size is out of the red zone.
+		 * If it is, we will turn on the DB_EVENT_REP_INQUEUE_FULL
+		 * event firing.
+		 *
+		 * We only have the redzone machanism for the main listener
+		 * process.
+		 */
+		if (!IS_SUBORDINATE(db_rep) &&
+		    rep->inqueue_full_event_on == 0) {
+			MUTEX_LOCK(env, rep->mtx_repmgr);
+			if (db_rep->input_queue.gbytes <
+			    rep->inqueue_rz_gbytes ||
+			    (db_rep->input_queue.gbytes ==
+			    rep->inqueue_rz_gbytes &&
+			    db_rep->input_queue.bytes <
+			    rep->inqueue_rz_bytes))
+				rep->inqueue_full_event_on = 1;
+			MUTEX_UNLOCK(env, rep->mtx_repmgr);
+		}
+
 		*msgp = m;
 	}
-
 err:
 	return (ret);
 }
@@ -157,24 +224,55 @@ __repmgr_queue_put(env, msg)
 	REPMGR_MESSAGE *msg;
 {
 	DB_REP *db_rep;
+	REP *rep;
+	u_int32_t msgsize;
 
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
+
+	/*
+	 * Drop message if incoming queue contains more messages than the
+	 * limit.  See dbenv->repmgr_set_incoming_queue_max() for more
+	 * information.
+	 */
+	MUTEX_LOCK(env, rep->mtx_repmgr);
+	if (db_rep->input_queue.gbytes > rep->inqueue_max_gbytes ||
+	    (db_rep->input_queue.gbytes == rep->inqueue_max_gbytes &&
+	    db_rep->input_queue.bytes >= rep->inqueue_max_bytes)) {
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "incoming queue limit exceeded"));
+		STAT(rep->mstat.st_incoming_msgs_dropped++);
+		if (IS_SUBORDINATE(db_rep) || rep->inqueue_full_event_on) {
+			DB_EVENT(env, DB_EVENT_REP_INQUEUE_FULL, NULL);
+			/*
+			 * We will always disable the event firing after
+			 * the queue is full.  It will be enabled again
+			 * after the incoming queue size is out of the
+			 * redzone.
+			 *
+			 * We only have the redzone machanism for the main
+			 * listener process.
+			 */
+			if (!IS_SUBORDINATE(db_rep))
+				rep->inqueue_full_event_on = 0;
+		}
+		MUTEX_UNLOCK(env, rep->mtx_repmgr);
+		__os_free(env, msg);
+		return (0);
+	}
+	MUTEX_UNLOCK(env, rep->mtx_repmgr);
 
 	STAILQ_INSERT_TAIL(&db_rep->input_queue.header, msg, entries);
-	db_rep->input_queue.size++;
+	msgsize = (u_int32_t)msg->size;
+	while (msgsize >= GIGABYTE) {
+		msgsize -= GIGABYTE;
+		db_rep->input_queue.gbytes++;
+	}
+	db_rep->input_queue.bytes += msgsize;
+	if (db_rep->input_queue.bytes >= GIGABYTE) {
+		db_rep->input_queue.gbytes++;
+		db_rep->input_queue.bytes -= GIGABYTE;
+	}
 
 	return (__repmgr_signal(&db_rep->msg_avail));
 }
-
-/*
- * PUBLIC: int __repmgr_queue_size __P((ENV *));
- *
- * !!!
- * Caller must hold repmgr->mutex.
- */
-int
-__repmgr_queue_size(env)
-	ENV *env;
-{
-	return (env->rep_handle->input_queue.size);
-}
diff --git a/src/repmgr/repmgr_rec.c b/src/repmgr/repmgr_rec.c
index 41827aff..568df45d 100644
--- a/src/repmgr/repmgr_rec.c
+++ b/src/repmgr/repmgr_rec.c
@@ -1,3 +1,11 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2014, 2015 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
 #include "db_config.h"
 
 #include "db_int.h"
@@ -31,7 +39,7 @@ __repmgr_member_recover(env, dbtp, lsnp, op, info)
 
 	/*
 	 * The annotation log record describes the update in enough detail for
-	 * us to be able to optimize our tracking of it at clients sites.
+	 * us to be able to optimize our tracking of it at client sites.
 	 * However, for now we just simply reread the whole (small) database
 	 * each time, since changes happen so seldom (and we need to have the
 	 * code for reading the whole thing anyway, for other cases).
diff --git a/src/repmgr/repmgr_sel.c b/src/repmgr/repmgr_sel.c
index ba14368f..c32dad25 100644
--- a/src/repmgr/repmgr_sel.c
+++ b/src/repmgr/repmgr_sel.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2006, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2006, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -12,7 +12,7 @@
 
 typedef int (*HEARTBEAT_ACTION) __P((ENV *));
 
-static int accept_handshake __P((ENV *, REPMGR_CONNECTION *, char *));
+static int accept_handshake __P((ENV *, REPMGR_CONNECTION *, char *, int *));
 static int accept_v1_handshake __P((ENV *, REPMGR_CONNECTION *, char *));
 static void check_min_log_file __P((ENV *));
 static int dispatch_msgin __P((ENV *, REPMGR_CONNECTION *));
@@ -23,13 +23,18 @@ static int process_parameters __P((ENV *,
 static int read_version_response __P((ENV *, REPMGR_CONNECTION *));
 static int record_permlsn __P((ENV *, REPMGR_CONNECTION *));
 static int __repmgr_call_election __P((ENV *));
+static int __repmgr_check_listener __P((ENV *));
+static int __repmgr_check_master_listener __P((ENV *));
 static int __repmgr_connector_main __P((ENV *, REPMGR_RUNNABLE *));
 static void *__repmgr_connector_thread __P((void *));
 static int __repmgr_next_timeout __P((ENV *,
     db_timespec *, HEARTBEAT_ACTION *));
+static int __repmgr_reset_last_rcvd __P((ENV *));
 static int __repmgr_retry_connections __P((ENV *));
 static int __repmgr_send_heartbeat __P((ENV *));
-static int __repmgr_try_one __P((ENV *, int));
+static int __repmgr_start_takeover __P((ENV *));
+static void *__repmgr_takeover_thread __P((void *));
+static int __repmgr_try_one __P((ENV *, int, int));
 static int resolve_collision __P((ENV *, REPMGR_SITE *, REPMGR_CONNECTION *));
 static int send_version_response __P((ENV *, REPMGR_CONNECTION *));
 
@@ -49,17 +54,24 @@ void *
 __repmgr_select_thread(argsp)
 	void *argsp;
 {
-	REPMGR_RUNNABLE *args;
 	ENV *env;
+	DB_THREAD_INFO *ip;
 	int ret;
+	REPMGR_RUNNABLE *args;
 
 	args = argsp;
 	env = args->env;
+	ip = NULL;
+	ret = 0;
 
-	if ((ret = __repmgr_select_loop(env))  != 0) {
+	ENV_ENTER_RET(env, ip, ret);
+	if (ret != 0 || (ret = __repmgr_select_loop(env)) != 0) {
 		__db_err(env, ret, DB_STR("3614", "select loop failed"));
+		ENV_LEAVE(env, ip);
 		(void)__repmgr_thread_failure(env, ret);
 	}
+	if (ret == 0)
+		ENV_LEAVE(env, ip);
 	return (NULL);
 }
 
@@ -71,12 +83,19 @@ __repmgr_bow_out(env)
 	ENV *env;
 {
 	DB_REP *db_rep;
+	REP *rep;
 	int ret;
 
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 	LOCK_MUTEX(db_rep->mutex);
 	ret = __repmgr_stop_threads(env);
 	UNLOCK_MUTEX(db_rep->mutex);
+	/*
+	 * Reset sites_avail so that it will be calculated correctly if this
+	 * site rejoins the group in the future.
+	 */
+	rep->sites_avail = 0;
 	DB_EVENT(env, DB_EVENT_REP_LOCAL_SITE_REMOVED, NULL);
 	return (ret);
 }
@@ -187,23 +206,53 @@ __repmgr_compute_timeout(env, timeout)
 	db_rep = env->rep_handle;
 
 	/*
-	 * There are two factors to consider: are heartbeats in use?  and, do we
+	 * There are four factors to consider: are heartbeats in use? do we
 	 * have any sites with broken connections that we ought to retry?
+	 * is there a listener process running locally? do we need to call
+	 * an election if no master listener exists?
 	 */
 	have_timeout = __repmgr_next_timeout(env, &t, NULL);
 
 	/* List items are in order, so we only have to examine the first one. */
 	if (!TAILQ_EMPTY(&db_rep->retries)) {
 		retry = TAILQ_FIRST(&db_rep->retries);
-		if (have_timeout) {
+		if (have_timeout)
 			/* Choose earliest timeout deadline. */
 			t = timespeccmp(&retry->time, &t, <) ? retry->time : t;
-		} else {
+		else {
 			t = retry->time;
 			have_timeout = TRUE;
 		}
 	}
 
+	/* Check listener every timeout in subordinate rep-aware process. */
+	if (IS_LISTENER_CAND(db_rep)) {
+		if (!timespecisset(&db_rep->l_listener_chk)) {
+			__os_gettime(env, &now, 1);
+			TIMESPEC_ADD_DB_TIMEOUT(&now, db_rep->l_listener_wait);
+			db_rep->l_listener_chk = now;
+		}
+		if (have_timeout)
+			t = timespeccmp(&db_rep->l_listener_chk, &t, <) ?
+			    db_rep->l_listener_chk : t;
+		else {
+			t = db_rep->l_listener_chk;
+			have_timeout = TRUE;
+		}
+	}
+
+	/* Check master listener if needed. */
+	if (FLD_ISSET(db_rep->region->config, REP_C_AUTOTAKEOVER) &&
+	    timespecisset(&db_rep->m_listener_chk)) {
+		if (have_timeout)
+			t = timespeccmp(&db_rep->m_listener_chk, &t, <) ?
+			    db_rep->m_listener_chk : t;
+		else {
+			t = db_rep->m_listener_chk;
+			have_timeout = TRUE;
+		}
+	}
+
 	if (have_timeout) {
 		__os_gettime(env, &now, 1);
 		if (timespeccmp(&now, &t, >=))
@@ -242,7 +291,17 @@ __repmgr_next_timeout(env, deadline, action)
 
 	if (rep->master_id == db_rep->self_eid &&
 	    rep->heartbeat_frequency > 0) {
-		t = db_rep->last_bcast;
+		/*
+		 * A temporary master in preferred master mode must send
+		 * regular heartbeats regardless of other activity because
+		 * the preferred master requires a heartbeat to take over as
+		 * master after it has synced with the temporary master.
+		 */
+		if (IS_PREFMAS_MODE(env) &&
+		    FLD_ISSET(rep->config, REP_C_PREFMAS_CLIENT))
+			t = db_rep->last_hbeat;
+		else
+			t = db_rep->last_bcast;
 		TIMESPEC_ADD_DB_TIMEOUT(&t, rep->heartbeat_frequency);
 		my_action = __repmgr_send_heartbeat;
 	} else if ((master = __repmgr_connected_master(env)) != NULL &&
@@ -301,6 +360,24 @@ __repmgr_send_heartbeat(env)
 
 	db_rep = env->rep_handle;
 	rep = db_rep->region;
+	ret = 0;
+
+	/*
+	 * Check test hook preventing heartbeats and connection attempts.
+	 * This is used to create and maintain a dupmaster condition in
+	 * a test until the test hook is rescinded.
+	 */
+	DB_TEST_SET(env->test_abort, DB_TEST_REPMGR_HEARTBEAT);
+
+	/*
+	 * Track last heartbeat for temporary master in preferred master
+	 * mode so that it will send regular heartbeats regardless of
+	 * other activity.
+	 */
+	if (IS_PREFMAS_MODE(env) &&
+	    FLD_ISSET(rep->config, REP_C_PREFMAS_CLIENT) &&
+	    rep->master_id == db_rep->self_eid)
+		__os_gettime(env, &db_rep->last_hbeat, 1);
 
 	permlsn.generation = rep->gen;
 	if ((ret = __rep_get_maxpermlsn(env, &permlsn.lsn)) != 0)
@@ -310,8 +387,11 @@ __repmgr_send_heartbeat(env)
 	control.size = __REPMGR_PERMLSN_SIZE;
 
 	DB_INIT_DBT(rec, NULL, 0);
-	return (__repmgr_send_broadcast(env,
-	    REPMGR_HEARTBEAT, &control, &rec, &unused1, &unused2, &unused3));
+	ret =__repmgr_send_broadcast(env,
+	    REPMGR_HEARTBEAT, &control, &rec, &unused1, &unused2, &unused3);
+
+DB_TEST_RECOVERY_LABEL
+	return (ret);
 }
 
 /*
@@ -373,6 +453,8 @@ __repmgr_check_timeouts(env)
 	HEARTBEAT_ACTION action;
 	int ret;
 
+	ret = 0;
+
 	/*
 	 * Figure out the next heartbeat-related thing to be done.  Then, if
 	 * it's time to do it, do so.
@@ -384,7 +466,342 @@ __repmgr_check_timeouts(env)
 			return (ret);
 	}
 
-	return (__repmgr_retry_connections(env));
+	/* Check the existence of local listener. */
+	if ((ret = __repmgr_check_listener(env)) != 0)
+		return (ret);
+
+	/* Check the existence of master listener. */
+	if ((ret = __repmgr_check_master_listener(env)) != 0)
+		return (ret);
+
+	/*
+	 * Check test hook preventing heartbeats and connection attempts.
+	 * This is used to create and maintain a dupmaster condition in
+	 * a test until the test hook is rescinded.
+	 */
+	DB_TEST_SET(env->test_abort, DB_TEST_REPMGR_HEARTBEAT);
+
+	ret = __repmgr_retry_connections(env);
+
+DB_TEST_RECOVERY_LABEL
+	return (ret);
+}
+
+/*
+ * Check the existence of the listener process on the local site.  If one
+ * does not exist and the current process is a subordinate rep-aware process,
+ * then start a takeover thread to covert this process to the listener process.
+ */
+static int
+__repmgr_check_listener(env)
+	ENV *env;
+{
+	DB_REP *db_rep;
+	REP *rep;
+	SITEINFO *sites;
+	db_timespec t;
+	int ret;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	ret = 0;
+
+	/*
+	 * Only subordinate rep-aware process can take over listener role, so
+	 * no need to check listener in listener process or rep unaware process.
+	 */
+	if (!IS_LISTENER_CAND(db_rep))
+		return (0);
+
+	/*
+	 * If the listener quits due to site removal, no subordinate process
+	 * should take over as listener as the current site is not expected
+	 * to be active in the group.  Check the status from the site array
+	 * in the shared region instead of that in the GMDB.  We do this
+	 * because the GMDB doesn't apply the change yet when replication
+	 * is stopped on the removed site.
+	 */
+	sites = R_ADDR(env->reginfo, rep->siteinfo_off);
+	if (sites[rep->self_eid].status == SITE_DELETING)
+		return (0);
+
+	/*
+	 * Check the listener after timeout.  If there is no listener, we
+	 * take over.  During takeover, we will refresh all connections.
+	 * A subordinate process does not have an up-to-date site list, so sync
+	 * up addresses from the in-memory site array before takeover.
+	 */
+	__os_gettime(env, &t, 1);
+	if (timespeccmp(&t, &db_rep->l_listener_chk, >=)) {
+		/* Compute the next timeout. */
+		TIMESPEC_ADD_DB_TIMEOUT(&t, db_rep->l_listener_wait);
+		db_rep->l_listener_chk = t;
+
+		/* Check if site address information needs to be refreshed. */
+		if ((rep->siteinfo_seq > db_rep->siteinfo_seq) &&
+		    (ret = __repmgr_sync_siteaddr(env)) != 0)
+			return (ret);
+
+		if (rep->listener == 0)
+			ret = __repmgr_start_takeover(env);
+	}
+	return (ret);
+}
+
+/*
+ * Start a thread to take over the listener role in the current subordinate
+ * process.
+ */
+static int
+__repmgr_start_takeover(env)
+	ENV *env;
+{
+	DB_REP *db_rep;
+	REPMGR_RUNNABLE *th;
+	int ret;
+
+	db_rep = env->rep_handle;
+	th = db_rep->takeover_thread;
+	if (th == NULL) {
+		if ((ret = __os_calloc(env, 1, sizeof(REPMGR_RUNNABLE),
+		    &th)) != 0)
+			return (ret);
+		db_rep->takeover_thread = th;
+	} else if (th->finished) {
+		if ((ret = __repmgr_thread_join(th)) != 0)
+			return (ret);
+	} else {
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "takeover thread still running"));
+		return (0);
+	}
+	th->run = __repmgr_takeover_thread;
+	if ((ret = __repmgr_thread_start(env, th)) != 0) {
+		__os_free(env, th);
+		db_rep->takeover_thread = NULL;
+	}
+	return (ret);
+}
+
+/*
+ * Take over listener role in the current subordinate process.
+ */
+static void *
+__repmgr_takeover_thread(argsp)
+	void *argsp;
+{
+	DB_REP *db_rep;
+	DB_THREAD_INFO *ip;
+	ENV *env;
+	REP *rep;
+	REPMGR_RUNNABLE *th;
+	int nthreads, ret, save_policy;
+
+	th = argsp;
+	env = th->env;
+	db_rep = env->rep_handle;
+	ip = NULL;
+	rep = db_rep->region;
+	ret = 0;
+
+	ENV_ENTER_RET(env, ip, ret);
+	if (ret != 0)
+		goto out;
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC, "starting takeover thread"));
+	/*
+	 * It is likely that there is an old heartbeat ready to expire
+	 * immediately upon restarting repmgr, leading to an unnecessary
+	 * election.  Reset the expiration countdown here to avoid this.
+	 */
+	if ((ret = __repmgr_reset_last_rcvd(env)) != 0)
+		goto out;
+	/*
+	 * If nthreads is set to be 0 in the current subordinate process, use
+	 * the value in the last listener. The nthreads should be larger than
+	 * 0 in listener.
+	 */
+	nthreads = db_rep->config_nthreads == 0 ? (int)rep->listener_nthreads :
+	    db_rep->config_nthreads;
+	/*
+	 * It is possible that this subordinate process does not have intact
+	 * connections to the other sites.  For most ack policies, restarting
+	 * repmgr will wait for acks when it commits its transaction to reload
+	 * the gmdb.  Temporarily set the ack policy to NONE for the takeover
+	 * so that it is not delayed waiting for acks that can never come.
+	 */
+	save_policy = rep->perm_policy;
+	rep->perm_policy = DB_REPMGR_ACKS_NONE;
+	/*
+	 * Restart the repmgr as listener.  If DB_REP_IGNORE is returned,
+	 * the current process has become listener.  If DB_REP_UNAVAIL is
+	 * returned, the site has been removed from the group and no listener
+	 * should be started.  For any other error, if the replication is
+	 * stopped because of the takeover thread, we will notify the
+	 * application.
+	 */
+	ret = __repmgr_start_int(env, nthreads, F_ISSET(rep, REP_F_MASTER) ?
+	    DB_REP_MASTER : DB_REP_CLIENT);
+	if (ret == 0 && !IS_SUBORDINATE(db_rep) &&
+	    db_rep->repmgr_status == running) {
+		STAT(rep->mstat.st_takeovers++);
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "finished takeover and became listener"));
+	} else if (ret != 0 && db_rep->repmgr_status == stopped) {
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "failed to take over, repmgr was stopped"));
+		DB_EVENT(env, DB_EVENT_REP_AUTOTAKEOVER_FAILED, NULL);
+	} else {
+		/* The current process is not changed to listener. */
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC, "failed to take over"));
+	}
+	rep->perm_policy = save_policy;
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC, "takeover thread is exiting"));
+	ENV_LEAVE(env, ip);
+out:	th->finished = TRUE;
+	return (NULL);
+}
+
+/*
+ * Reset the last_rcvd_timestamp to restart the wait for a heartbeat
+ * monitor expiration.
+ */
+static int
+__repmgr_reset_last_rcvd(env)
+	ENV *env;
+{
+	DB_REP *db_rep;
+	REPMGR_SITE *master;
+
+	db_rep = env->rep_handle;
+
+	LOCK_MUTEX(db_rep->mutex);
+	if ((master = __repmgr_connected_master(env)) != NULL)
+		__os_gettime(env, &master->last_rcvd_timestamp, 1);
+	UNLOCK_MUTEX(db_rep->mutex);
+	return (0);
+}
+
+/*
+ * Monitor the connection to master listener.  When the master listener is
+ * disconnected and some other master process might take over as listener
+ * soon, we will delay the election.  After the delay if there is still no
+ * connection from master listener, call an election then.
+ */
+static int
+__repmgr_check_master_listener(env)
+	ENV *env;
+{
+	DB_REP *db_rep;
+	REP *rep;
+	REPMGR_SITE *master;
+	db_timespec t;
+	u_int32_t flags;
+	int ret;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	ret = 0;
+
+	/*
+	 * We only check for a master listener if m_listener_chk is set.
+	 * The field is only set when __repmgr_bust_connection() previously
+	 * detected the loss of our connection to the master listener.
+	 * If rep->master_id is invalid, wait until it is ready to check.
+	 */
+	if (!FLD_ISSET((db_rep)->region->config, REP_C_AUTOTAKEOVER) ||
+	    !timespecisset(&db_rep->m_listener_chk) ||
+	    !IS_VALID_EID(rep->master_id))
+		return (0);
+
+	__os_gettime(env, &t, 1);
+	if (timespeccmp(&t, &db_rep->m_listener_chk, >=)) {
+		master = SITE_FROM_EID(db_rep->region->master_id);
+		if (master->ref.conn.out == NULL &&
+		    master->ref.conn.in == NULL) {
+			flags = ELECT_F_EVENT_NOTIFY;
+			if (FLD_ISSET(db_rep->region->config, REP_C_ELECTIONS))
+				LF_SET(ELECT_F_IMMED | ELECT_F_FAST);
+			else
+				RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+				    "Master failure, but no elections"));
+
+			/*
+			 * In preferred master mode, a client that has lost its
+			 * connection to the master uses an election thread to
+			 * restart as master.
+			 */
+			if (IS_PREFMAS_MODE(env)) {
+				RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+"check_master_listener setting preferred master temp master"));
+				db_rep->prefmas_pending = start_temp_master;
+			}
+
+			ret = __repmgr_init_election(env, flags);
+		}
+		/*
+		 * If the delay has expired reset m_listener_chk.  We reset
+		 * it whether or not the master listener process comes back
+		 * so that we will not continue checking for a master listener
+		 * indefinitely.
+		 */
+		timespecclear(&db_rep->m_listener_chk);
+	}
+	return (ret);
+}
+
+/*
+ * Wake up I/O waiting in selector thread, refresh connections to all connected
+ * and present sites.
+ *
+ * PUBLIC: int __repmgr_refresh_selector __P((ENV *));
+ */
+int
+__repmgr_refresh_selector(env)
+	ENV *env;
+{
+	DB_REP *db_rep;
+	REP *rep;
+	REPMGR_RETRY *retry;
+	REPMGR_SITE *site;
+	SITEINFO *sites;
+	int eid, ret;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+
+	if ((ret = __repmgr_wake_main_thread(env)) != 0)
+		return (ret);
+
+	FOR_EACH_REMOTE_SITE_INDEX(eid) {
+		SET_LISTENER_CAND(1, = 0);
+		site = SITE_FROM_EID(eid);
+
+		/*
+		 * It is possible some sites were left in a paused state
+		 * during the switch, so they have to be removed from the
+		 * retry list.
+		 */
+		if (site->state == SITE_PAUSING) {
+			retry = site->ref.retry;
+			if (retry != NULL) {
+				RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+				    "Removing site from retry list eid %lu",
+				    (u_long)eid));
+				TAILQ_REMOVE(&db_rep->retries, retry, entries);
+				__os_free(env, retry);
+				site->ref.retry = NULL;
+			}
+
+		}
+		/*
+		 * Try to connect to any site that is now PRESENT after
+		 * rereading the gmdb.
+		 */
+		if (site->membership == SITE_PRESENT &&
+		    (ret = __repmgr_try_one(env, eid, TRUE)) != 0)
+			return (ret);
+	}
+	return (0);
 }
 
 /*
@@ -415,10 +832,11 @@ __repmgr_retry_connections(env)
 		__os_free(env, retry);
 		DB_ASSERT(env, IS_VALID_EID(eid));
 		site = SITE_FROM_EID(eid);
+		site->ref.retry = NULL;
 		DB_ASSERT(env, site->state == SITE_PAUSING);
 
 		if (site->membership == SITE_PRESENT) {
-			if ((ret = __repmgr_try_one(env, eid)) != 0)
+			if ((ret = __repmgr_try_one(env, eid, FALSE)) != 0)
 				return (ret);
 		} else
 			site->state = SITE_IDLE;
@@ -437,11 +855,23 @@ __repmgr_first_try_connections(env)
 	ENV *env;
 {
 	DB_REP *db_rep;
+	REP *rep;
 	REPMGR_SITE *site;
+	SITEINFO *sites;
 	int eid, ret;
 
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
+
+	/*
+	 * Check test hook preventing heartbeats and connection attempts.
+	 * This is used to create and maintain a dupmaster condition in
+	 * a test until the test hook is rescinded.
+	 */
+	DB_TEST_SET(env->test_abort, DB_TEST_REPMGR_HEARTBEAT);
+
 	FOR_EACH_REMOTE_SITE_INDEX(eid) {
+		SET_LISTENER_CAND(1, = 0);
 		site = SITE_FROM_EID(eid);
 		/*
 		 * Normally all sites would be IDLE here.  But if a user thread
@@ -453,19 +883,22 @@ __repmgr_first_try_connections(env)
 		 */
 		if (site->state == SITE_IDLE &&
 		    site->membership == SITE_PRESENT &&
-		    (ret = __repmgr_try_one(env, eid)) != 0)
+		    (ret = __repmgr_try_one(env, eid, FALSE)) != 0)
 			return (ret);
 	}
+DB_TEST_RECOVERY_LABEL
 	return (0);
 }
 
 /*
- * Starts a thread to open a connection to the site at the given EID.
+ * Starts a thread to open a connection to the site at the given EID.  We might
+ * have no connection to the site, or an existing connection to be replaced.
  */
 static int
-__repmgr_try_one(env, eid)
+__repmgr_try_one(env, eid, refresh)
 	ENV *env;
 	int eid;
+	int refresh;
 {
 	DB_REP *db_rep;
 	REPMGR_SITE *site;
@@ -488,13 +921,22 @@ __repmgr_try_one(env, eid)
 		  "eid %lu previous connector thread still running; will retry",
 		    (u_long)eid));
 		return (__repmgr_schedule_connection_attempt(env,
-			eid, FALSE));
+			eid, refresh));
 	}
 
 	site->state = SITE_CONNECTING;
 
 	th->run = __repmgr_connector_thread;
-	th->args.eid = eid;
+	th->args.conn_th.eid = eid;
+	/*
+	 * The flag CONNECT_F_REFRESH indicates an immediate connection attempt
+	 * should be scheduled if the current connection attempt fails.  It is
+	 * turned on before the first attempt to refresh the connection but
+	 * turned off if the first attempt fails.  In this way, when refreshing
+	 * the connection, there will be at most two immediate connection
+	 * attempts, after that, retry as usual.
+	 */
+	th->args.conn_th.flags = refresh ? CONNECT_F_REFRESH : 0;
 	if ((ret = __repmgr_thread_start(env, th)) != 0) {
 		__os_free(env, th);
 		site->connector = NULL;
@@ -506,21 +948,33 @@ static void *
 __repmgr_connector_thread(argsp)
 	void *argsp;
 {
-	REPMGR_RUNNABLE *th;
 	ENV *env;
+	DB_THREAD_INFO *ip;
+	REPMGR_RUNNABLE *th;
 	int ret;
 
 	th = argsp;
 	env = th->env;
+	ip = NULL;
+	ret = 0;
 
-	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
-	    "starting connector thread, eid %u", th->args.eid));
-	if ((ret = __repmgr_connector_main(env, th)) != 0) {
+	ENV_ENTER_RET(env, ip, ret);
+	if (ret == 0)
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "starting connector thread, eid %u",
+		    th->args.conn_th.eid));
+	if (ret != 0 || (ret = __repmgr_connector_main(env, th)) != 0) {
 		__db_err(env, ret, DB_STR("3617", "connector thread failed"));
+		RPRINT(env, (env,
+		    DB_VERB_REPMGR_MISC, "connector thread is exiting"));
+		ENV_LEAVE(env, ip);
 		(void)__repmgr_thread_failure(env, ret);
 	}
-	RPRINT(env, (env, DB_VERB_REPMGR_MISC, "connector thread is exiting"));
-
+	if (ret == 0) {
+		RPRINT(env, (env,
+		    DB_VERB_REPMGR_MISC, "connector thread is exiting"));
+		ENV_LEAVE(env, ip);
+	}
 	th->finished = TRUE;
 	return (NULL);
 }
@@ -542,8 +996,8 @@ __repmgr_connector_main(env, th)
 	ret = 0;
 
 	LOCK_MUTEX(db_rep->mutex);
-	DB_ASSERT(env, IS_VALID_EID(th->args.eid));
-	site = SITE_FROM_EID(th->args.eid);
+	DB_ASSERT(env, IS_VALID_EID(th->args.conn_th.eid));
+	site = SITE_FROM_EID(th->args.conn_th.eid);
 	if (site->state != SITE_CONNECTING && db_rep->repmgr_status == stopped)
 		goto unlock;
 
@@ -563,7 +1017,8 @@ __repmgr_connector_main(env, th)
 	UNLOCK_MUTEX(db_rep->mutex);
 
 	if ((ret = __repmgr_connect(env, &netaddr, &conn, &err)) == 0) {
-		DB_EVENT(env,  DB_EVENT_REP_CONNECT_ESTD, &th->args.eid);
+		DB_EVENT(env,
+		    DB_EVENT_REP_CONNECT_ESTD, &th->args.conn_th.eid);
 		LOCK_MUTEX(db_rep->mutex);
 		if ((ret = __repmgr_set_nonblock_conn(conn)) != 0) {
 			__db_err(env, ret, DB_STR("3618",
@@ -571,33 +1026,53 @@ __repmgr_connector_main(env, th)
 			goto cleanup;
 		}
 		conn->type = REP_CONNECTION;
-		site = SITE_FROM_EID(th->args.eid);
+		site = SITE_FROM_EID(th->args.conn_th.eid);
 		if (site->state != SITE_CONNECTING ||
 		    db_rep->repmgr_status == stopped)
 			goto cleanup;
 
-		conn->eid = th->args.eid;
-		site = SITE_FROM_EID(th->args.eid);
-		site->ref.conn.out = conn;
+		conn->eid = th->args.conn_th.eid;
+		site = SITE_FROM_EID(th->args.conn_th.eid);
+		/*
+		 * If there is an existing outgoing connection, disable it and
+		 * replace it with a new connection.  The sites for a formerly
+		 * subordinate handle that is now taking over might still be
+		 * SITE_CONNECTING.  Set to SITE_CONNECTED before disabling
+		 * connection so that sites_avail is correctly maintained.
+		 */
 		site->state = SITE_CONNECTED;
+		if (site->ref.conn.out != NULL)
+			(void)__repmgr_disable_connection(env,
+			    site->ref.conn.out);
+		site->ref.conn.out = conn;
 		__os_gettime(env, &site->last_rcvd_timestamp, 1);
 		ret = __repmgr_wake_main_thread(env);
 	} else if (ret == DB_REP_UNAVAIL) {
 		/* Retryable error while trying to connect: retry later. */
-		info.eid = th->args.eid;
+		info.eid = th->args.conn_th.eid;
 		info.error = err;
 		DB_EVENT(env, DB_EVENT_REP_CONNECT_TRY_FAILED, &info);
 		STAT(db_rep->region->mstat.st_connect_fail++);
 
 		LOCK_MUTEX(db_rep->mutex);
-		site = SITE_FROM_EID(th->args.eid);
+		site = SITE_FROM_EID(th->args.conn_th.eid);
 		if (site->state != SITE_CONNECTING ||
 		    db_rep->repmgr_status == stopped) {
 			ret = 0;
 			goto unlock;
 		}
+		/*
+		 * If it fails to create a new outgoing connection to replace
+		 * the existing one in the first attempt, schedule another
+		 * immediate attempt.  If it is our second attempt, disable
+		 * the existing connections and retry as normal.
+		 */
+		if (site->ref.conn.out != NULL && th->args.conn_th.flags == 0)
+			(void)__repmgr_disable_connection(env,
+			    site->ref.conn.out);
 		ret = __repmgr_schedule_connection_attempt(env,
-		    th->args.eid, FALSE);
+		    th->args.conn_th.eid,
+		    th->args.conn_th.flags == CONNECT_F_REFRESH);
 	} else
 		goto out;
 
@@ -842,6 +1317,7 @@ prepare_input(env, conn)
 		if ((ret = __os_malloc(env, memsize, &membase)) != 0)
 			return (ret);
 		conn->input.rep_message = membase;
+		conn->input.rep_message->size = memsize;
 		conn->input.rep_message->msg_hdr = msg_hdr;
 		conn->input.rep_message->v.repmsg.originating_eid = conn->eid;
 
@@ -876,6 +1352,7 @@ prepare_input(env, conn)
 		if ((ret = __os_malloc(env, memsize, &membase)) != 0)
 			return (ret);
 		conn->input.rep_message = membase;
+		conn->input.rep_message->size = memsize;
 		conn->input.rep_message->msg_hdr = msg_hdr;
 		conn->input.rep_message->v.appmsg.conn = conn;
 
@@ -891,6 +1368,7 @@ prepare_input(env, conn)
 		if ((ret = __os_malloc(env, size, &membase)) != 0)
 			return (ret);
 		conn->input.rep_message = membase;
+		conn->input.rep_message->size = size;
 		conn->input.rep_message->msg_hdr = msg_hdr;
 
 		/*
@@ -1065,16 +1543,18 @@ dispatch_msgin(env, conn)
 	ENV *env;
 	REPMGR_CONNECTION *conn;
 {
+	DBT *dbt;
 	DB_REP *db_rep;
-	REPMGR_SITE *site;
-	REPMGR_RUNNABLE *th;
+	REP *rep;
 	REPMGR_RESPONSE *resp;
-	DBT *dbt;
+	REPMGR_RUNNABLE *th;
+	REPMGR_SITE *site;
 	char *hostname;
-	int eid, ret;
+	int eid, ret, subord;
 
 	DB_ASSERT(env, conn->reading_phase == DATA_PHASE);
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 
 	switch (conn->state) {
 	case CONN_CONNECTED:
@@ -1129,9 +1609,22 @@ dispatch_msgin(env, conn)
 			dbt = &conn->input.repmgr_msg.rec;
 			hostname = dbt->data;
 			hostname[dbt->size-1] = '\0';
-			if ((ret = accept_handshake(env, conn, hostname)) != 0)
+			if ((ret = accept_handshake(env,
+			    conn, hostname, &subord)) != 0)
 				return (ret);
 			conn->state = CONN_READY;
+			site = SITE_FROM_EID(conn->eid);
+			/*
+			 * Do not increase sites_avail redundantly for an
+			 * incoming subordinate connection.
+			 */
+			if (conn->type == REP_CONNECTION &&
+			    site->state == SITE_CONNECTED && !subord) {
+				rep->sites_avail++;
+				RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+    "msgin: EID %lu CONNECTED, READY.  sites_avail %lu",
+    (u_long)conn->eid, (u_long)rep->sites_avail));
+			}
 			break;
 		case REPMGR_OWN_MSG:
 			/*
@@ -1279,9 +1772,11 @@ process_own_msg(env, conn)
 	REPMGR_SITE *site;
 	REPMGR_MESSAGE *msg;
 	__repmgr_connect_reject_args reject;
+	__repmgr_v4connect_reject_args v4reject;
 	__repmgr_parm_refresh_args parms;
 	int ret;
 
+	db_rep = env->rep_handle;
 	ret = 0;
 	/*
 	 * Set "msg" to point to the message struct.  If we do all necessary
@@ -1293,28 +1788,61 @@ process_own_msg(env, conn)
 	switch (REPMGR_OWN_MSG_TYPE((msg = conn->input.rep_message)->msg_hdr)) {
 	case REPMGR_CONNECT_REJECT:
 		dbt = &msg->v.gmdb_msg.request;
-		if ((ret = __repmgr_connect_reject_unmarshal(env,
-		    &reject, dbt->data, dbt->size, NULL)) != 0)
-			return (DB_REP_UNAVAIL);
+		if (conn->version < 5) {
+			if ((ret = __repmgr_v4connect_reject_unmarshal(env,
+			    &v4reject, dbt->data, dbt->size, NULL)) != 0)
+				return (DB_REP_UNAVAIL);
+			reject.version = v4reject.version;
+			reject.gen = v4reject.gen;
+			reject.status = 0;
+		} else {
+			if ((ret = __repmgr_connect_reject_unmarshal(env,
+			    &reject, dbt->data, dbt->size, NULL)) != 0)
+				return (DB_REP_UNAVAIL);
+		}
 
 		/*
 		 * If we're being rejected by someone who has more up-to-date
-		 * membership information than we do, it means we have been
-		 * removed from the group.  If we've just gotten started, we can
-		 * make one attempt at automatically rejoining; otherwise we bow
-		 * out gracefully.
+		 * membership information than we do, it means we are not in
+		 * the group.  If we've just gotten started, or our status is
+		 * adding, we can make one attempt at automatically rejoining;
+		 * otherwise we bow out gracefully.
 		 */
 		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
-			"got rejection msg citing version %lu/%lu",
-			(u_long)reject.gen, (u_long)reject.version));
+	"got rejection msg citing version %lu/%lu mine %lu/%lu membership %lu",
+			(u_long)reject.gen, (u_long)reject.version,
+			(u_long)db_rep->member_version_gen,
+			(u_long)db_rep->membership_version,
+			(u_long)reject.status));
 
 		if (__repmgr_gmdb_version_cmp(env,
 		    reject.gen, reject.version) > 0) {
-			if (env->rep_handle->seen_repmsg)
+			if (db_rep->seen_repmsg && reject.status != SITE_ADDING)
 				ret = DB_DELETED;
-			else if ((ret = __repmgr_defer_op(env,
-			    REPMGR_REJOIN)) == 0)
-				ret = DB_REP_UNAVAIL;
+			else {
+				/*
+				 * If 2SITE_STRICT is off, we are likely to
+				 * win an election with our own vote before
+				 * discovering there is already a master.
+				 * Set indicator to defer the election until
+				 * after rejoining group.
+				 *
+				 * In preferred master mode, either site
+				 * should defer the election (which
+				 * executes the preferred master startup
+				 * code and only calls an election if it is
+				 * safe) and also avoid scheduling an extra
+				 * reconnect attempt in bust_connection()
+				 * by setting the indicator.
+				 */
+				if (!FLD_ISSET(db_rep->region->config,
+				    REP_C_2SITE_STRICT) ||
+				    IS_PREFMAS_MODE(env))
+					db_rep->rejoin_pending = TRUE;
+				if ((ret = __repmgr_defer_op(env,
+				    REPMGR_REJOIN)) == 0)
+					ret = DB_REP_UNAVAIL;
+			}
 		} else
 			ret = DB_REP_UNAVAIL;
 		DB_ASSERT(env, ret != 0);
@@ -1332,7 +1860,6 @@ process_own_msg(env, conn)
 		if ((ret = __repmgr_parm_refresh_unmarshal(env,
 		    &parms, dbt->data, dbt->size, NULL)) != 0)
 			return (DB_REP_UNAVAIL);
-		db_rep = env->rep_handle;
 		DB_ASSERT(env, conn->type == REP_CONNECTION &&
 		    IS_KNOWN_REMOTE_SITE(conn->eid));
 		site = SITE_FROM_EID(conn->eid);
@@ -1348,8 +1875,15 @@ process_own_msg(env, conn)
 	case REPMGR_GM_FORWARD:
 	case REPMGR_JOIN_REQUEST:
 	case REPMGR_JOIN_SUCCESS:
+	case REPMGR_LSNHIST_REQUEST:
+	case REPMGR_LSNHIST_RESPONSE:
+	case REPMGR_PREFMAS_FAILURE:
+	case REPMGR_PREFMAS_SUCCESS:
+	case REPMGR_READONLY_MASTER:
+	case REPMGR_READONLY_RESPONSE:
 	case REPMGR_REMOVE_REQUEST:
 	case REPMGR_RESOLVE_LIMBO:
+	case REPMGR_RESTART_CLIENT:
 	default:
 		__db_errx(env, DB_STR_A("3677",
 		    "unexpected msg type %lu in process_own_msg", "%lu"),
@@ -1482,6 +2016,8 @@ __repmgr_send_handshake(env, conn, opt, optlen, flags)
 		cntrl_len = __REPMGR_V3HANDSHAKE_SIZE;
 		break;
 	case 4:
+	case 5:
+	case 6:
 		cntrl_len = __REPMGR_HANDSHAKE_SIZE;
 		break;
 	default:
@@ -1513,6 +2049,8 @@ __repmgr_send_handshake(env, conn, opt, optlen, flags)
 		__repmgr_v3handshake_marshal(env, &v3hs, p);
 		break;
 	case 4:
+	case 5:
+	case 6:
 		hs.port = my_addr->port;
 		hs.alignment = MEM_ALIGN;
 		hs.ack_policy = (u_int32_t)rep->perm_policy;
@@ -1551,11 +2089,14 @@ read_version_response(env, conn)
 	DB_REP *db_rep;
 	__repmgr_version_confirmation_args conf;
 	DBT vi;
+	REP *rep;
+	REPMGR_SITE *site;
 	char *hostname;
 	u_int32_t flags;
-	int ret;
+	int ret, subord;
 
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 
 	if ((ret = __repmgr_find_version_info(env, conn, &vi)) != 0)
 		return (ret);
@@ -1581,14 +2122,37 @@ read_version_response(env, conn)
 			return (DB_REP_UNAVAIL);
 		}
 
-		if ((ret = accept_handshake(env, conn, hostname)) != 0)
+		if ((ret = accept_handshake(env, conn, hostname, &subord)) != 0)
 			return (ret);
-		flags = IS_SUBORDINATE(db_rep) ? REPMGR_SUBORDINATE : 0;
+		if (!IS_SUBORDINATE(db_rep))
+			flags = 0;
+		else {
+			flags = REPMGR_SUBORDINATE;
+			if (FLD_ISSET(rep->config, REP_C_AUTOTAKEOVER) &&
+			    db_rep->repmgr_status == running)
+				/*
+				 * Takeover is enabled in rep-aware subordinate
+				 * process.
+				 */
+				flags |= REPMGR_AUTOTAKEOVER;
+		}
 		if ((ret = __repmgr_send_handshake(env,
 		    conn, NULL, 0, flags)) != 0)
 			return (ret);
 	}
 	conn->state = CONN_READY;
+	site = SITE_FROM_EID(conn->eid);
+	/*
+	 * Do not increase sites_avail redundantly for a new outgoing
+	 * connection from a subordinate process.
+	 */
+	if (conn->type == REP_CONNECTION &&
+	    site->state == SITE_CONNECTED && !IS_SUBORDINATE(db_rep)) {
+		rep->sites_avail++;
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "vers_resp: EID %lu CONNECTED, READY.  sites_avail %lu",
+		    (u_long)conn->eid, (u_long)rep->sites_avail));
+	}
 	return (ret);
 }
 
@@ -1641,10 +2205,11 @@ __repmgr_find_version_info(env, conn, vi)
 }
 
 static int
-accept_handshake(env, conn, hostname)
+accept_handshake(env, conn, hostname, subordinate)
 	ENV *env;
 	REPMGR_CONNECTION *conn;
 	char *hostname;
+	int *subordinate;
 {
 	__repmgr_handshake_args hs;
 	__repmgr_v2handshake_args hs2;
@@ -1653,6 +2218,7 @@ accept_handshake(env, conn, hostname)
 	u_int32_t ack, flags;
 	int electable;
 
+	*subordinate = 0;
 	switch (conn->version) {
 	case 2:
 		if (__repmgr_v2handshake_unmarshal(env, &hs2,
@@ -1674,6 +2240,8 @@ accept_handshake(env, conn, hostname)
 		ack = 0;
 		break;
 	case 4:
+	case 5:
+	case 6:
 		if (__repmgr_handshake_unmarshal(env, &hs,
 		   conn->input.repmgr_msg.cntrl.data,
 		   conn->input.repmgr_msg.cntrl.size, NULL) != 0)
@@ -1682,6 +2250,8 @@ accept_handshake(env, conn, hostname)
 		electable = F_ISSET(&hs, ELECTABLE_SITE);
 		flags = hs.flags;
 		ack = hs.ack_policy;
+		if (LF_ISSET(REPMGR_SUBORDINATE))
+			*subordinate = 1;
 		break;
 	default:
 		__db_errx(env, DB_STR_A("3679",
@@ -1729,13 +2299,17 @@ process_parameters(env, conn, host, port, ack, electable, flags)
 	u_int32_t ack, flags;
 {
 	DB_REP *db_rep;
+	REP *rep;
 	REPMGR_RETRY *retry;
 	REPMGR_SITE *site;
+	SITEINFO *sites;
 	__repmgr_connect_reject_args reject;
+	__repmgr_v4connect_reject_args v4reject;
 	u_int8_t reject_buf[__REPMGR_CONNECT_REJECT_SIZE];
 	int eid, ret;
 
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 
 	/* Connection state can be used to discern incoming versus outgoing. */
 	if (conn->state == CONN_CONNECTED) {
@@ -1785,6 +2359,13 @@ process_parameters(env, conn, host, port, ack, electable, flags)
 				TAILQ_INSERT_TAIL(&site->sub_conns,
 				    conn, entries);
 				conn->eid = eid;
+				conn->auto_takeover =
+				    LF_ISSET(REPMGR_AUTOTAKEOVER) ? 1 : 0;
+				SET_LISTENER_CAND(conn->auto_takeover, ++);
+				RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		"handshake from subordinate %sconnection at site %s:%u EID %u",
+				    LF_ISSET(REPMGR_AUTOTAKEOVER)?
+				    "takeover ": "", host, port, eid));
 			} else {
 				DB_EVENT(env,
 				    DB_EVENT_REP_CONNECT_ESTD, &eid);
@@ -1797,6 +2378,7 @@ process_parameters(env, conn, host, port, ack, electable, flags)
 					TAILQ_REMOVE(&db_rep->retries,
 					    retry, entries);
 					__os_free(env, retry);
+					site->ref.retry = NULL;
 					break;
 				case SITE_CONNECTED:
 					/*
@@ -1821,6 +2403,16 @@ process_parameters(env, conn, host, port, ack, electable, flags)
 					 * don't have to do anything else here.
 					 */
 					break;
+				case SITE_IDLE:
+					/*
+					 * This can occur after the heartbeat
+					 * test hook artificially kept this
+					 * site from first trying to connect.
+					 */
+					RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+				      "handshake from idle site %s:%u EID %u",
+					    host, port, eid));
+					break;
 				default:
 					DB_ASSERT(env, FALSE);
 				}
@@ -1834,10 +2426,18 @@ process_parameters(env, conn, host, port, ack, electable, flags)
 			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
 		  "rejecting connection from unknown or provisional site %s:%u",
 			    host, port));
-			reject.version = db_rep->membership_version;
-			reject.gen = db_rep->member_version_gen;
-			__repmgr_connect_reject_marshal(env,
-			    &reject, reject_buf);
+			if (conn->version < 5) {
+				v4reject.version = db_rep->membership_version;
+				v4reject.gen = db_rep->member_version_gen;
+				__repmgr_v4connect_reject_marshal(env,
+				    &v4reject, reject_buf);
+			} else {
+				reject.version = db_rep->membership_version;
+				reject.gen = db_rep->member_version_gen;
+				reject.status = (site) ? site->membership : 0;
+				__repmgr_connect_reject_marshal(env,
+				    &reject, reject_buf);
+			}
 
 			if ((ret = __repmgr_send_own_msg(env, conn,
 			    REPMGR_CONNECT_REJECT, reject_buf,
@@ -1867,7 +2467,8 @@ process_parameters(env, conn, host, port, ack, electable, flags)
 	 */
 	if (!IS_SUBORDINATE(db_rep) && /* us */
 	    !__repmgr_master_is_known(env) &&
-	    !LF_ISSET(REPMGR_SUBORDINATE)) { /* the remote site */
+	    !LF_ISSET(REPMGR_SUBORDINATE) && /* the remote site */
+	    !IS_PREFMAS_MODE(env)) {
 		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
 		    "handshake with no known master to wake election thread"));
 		db_rep->new_connection = TRUE;
@@ -1980,6 +2581,7 @@ record_permlsn(env, conn)
 		 */
 		if (ackp->lsn.file > site->max_ack.file)
 			do_log_check = 1;
+		site->max_ack_gen = ackp->generation;
 		memcpy(&site->max_ack, &ackp->lsn, sizeof(DB_LSN));
 		if (do_log_check)
 			check_min_log_file(env);
diff --git a/src/repmgr/repmgr_stat.c b/src/repmgr/repmgr_stat.c
index fd6dabd3..215f4719 100644
--- a/src/repmgr/repmgr_stat.c
+++ b/src/repmgr/repmgr_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -55,7 +55,9 @@ __repmgr_stat(env, statp, flags)
 {
 	DB_REP *db_rep;
 	DB_REPMGR_STAT *copy, *stats;
-	uintmax_t tmp;
+	REPMGR_SITE *site;
+	u_int32_t tmp;
+	u_int i;
 	int ret;
 
 	db_rep = env->rep_handle;
@@ -73,6 +75,20 @@ __repmgr_stat(env, statp, flags)
 		memset(stats, 0, sizeof(DB_REPMGR_STAT));
 		stats->st_max_elect_threads = tmp;
 	}
+	stats->st_incoming_queue_gbytes = db_rep->input_queue.gbytes;
+	stats->st_incoming_queue_bytes = db_rep->input_queue.bytes;
+	LOCK_MUTEX(db_rep->mutex);
+	for (i = 0; i < db_rep->site_cnt; i++) {
+		site = SITE_FROM_EID(i);
+		if (site->membership != 0) {
+			copy->st_site_total++;
+			if (FLD_ISSET(site->gmdb_flags, SITE_VIEW))
+				copy->st_site_views++;
+			else
+				copy->st_site_participants++;
+		}
+	}
+	UNLOCK_MUTEX(db_rep->mutex);
 
 	*statp = copy;
 	return (0);
@@ -148,6 +164,11 @@ __repmgr_print_stats(env, flags)
 	    (u_long)sp->st_msgs_queued);
 	__db_dl(env, "Number of messages discarded due to queue length",
 	    (u_long)sp->st_msgs_dropped);
+	__db_dlbytes(env, "Incoming message size in queue",
+	    (u_long)sp->st_incoming_queue_gbytes, (u_long)0,
+	    (u_long)sp->st_incoming_queue_bytes);
+	__db_dl(env, "Number of messages discarded due to incoming queue full",
+	    (u_long)sp->st_incoming_msgs_dropped);
 	__db_dl(env, "Number of existing connections dropped",
 	    (u_long)sp->st_connection_drop);
 	__db_dl(env, "Number of failed new connection attempts",
@@ -156,6 +177,14 @@ __repmgr_print_stats(env, flags)
 	    (u_long)sp->st_elect_threads);
 	__db_dl(env, "Election threads for which space is reserved",
 	    (u_long)sp->st_max_elect_threads);
+	__db_dl(env, "Number of participant sites in replication group",
+	    (u_long)sp->st_site_participants);
+	__db_dl(env, "Total number of sites in replication group",
+	    (u_long)sp->st_site_total);
+	__db_dl(env, "Number of view sites in replication group",
+	    (u_long)sp->st_site_views);
+	__db_dl(env, "Number of automatic replication process takeovers",
+	    (u_long)sp->st_takeovers);
 
 	__os_ufree(env, sp);
 
@@ -171,7 +200,7 @@ __repmgr_print_sites(env)
 	u_int count, i;
 	int ret;
 
-	if ((ret = __repmgr_site_list(env->dbenv, &count, &list)) != 0)
+	if ((ret = __repmgr_site_list_int(env, &count, &list)) != 0)
 		return (ret);
 
 	if (count == 0)
@@ -189,6 +218,9 @@ __repmgr_print_sites(env)
 			    list[i].status == DB_REPMGR_CONNECTED ? "" : "dis");
 		__db_msgadd(env, &mb, ", %speer",
 		    F_ISSET(&list[i], DB_REPMGR_ISPEER) ? "" : "non-");
+		__db_msgadd(env, &mb, ", %s",
+		    F_ISSET(&list[i], DB_REPMGR_ISVIEW) ?
+		    "view" : "participant");
 		__db_msgadd(env, &mb, ")");
 		DB_MSGBUF_FLUSH(env, &mb);
 	}
@@ -238,26 +270,46 @@ __repmgr_stat_print_pp(dbenv, flags)
 #endif
 
 /*
- * PUBLIC: int __repmgr_site_list __P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
+ * PUBLIC: int __repmgr_site_list_pp
+ * PUBLIC:	__P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
  */
 int
-__repmgr_site_list(dbenv, countp, listp)
+__repmgr_site_list_pp(dbenv, countp, listp)
 	DB_ENV *dbenv;
 	u_int *countp;
 	DB_REPMGR_SITE **listp;
 {
-	DB_REP *db_rep;
-	REP *rep;
-	DB_REPMGR_SITE *status;
 	ENV *env;
 	DB_THREAD_INFO *ip;
+	int ret;
+
+	env = dbenv->env;
+
+	ENV_ENTER(env, ip);
+	ret = __repmgr_site_list_int(env, countp, listp);
+	ENV_LEAVE(env, ip);
+
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __repmgr_site_list_int __P((ENV *, u_int *, DB_REPMGR_SITE **));
+ */
+int
+__repmgr_site_list_int(env, countp, listp)
+	ENV *env;
+	u_int *countp;
+	DB_REPMGR_SITE **listp;
+{
+	DB_REP *db_rep;
+	DB_REPMGR_SITE *status;
+	REP *rep;
 	REPMGR_SITE *site;
 	size_t array_size, total_size;
 	int eid, locked, ret;
 	u_int count, i;
 	char *name;
 
-	env = dbenv->env;
 	db_rep = env->rep_handle;
 	ret = 0;
 
@@ -269,10 +321,8 @@ __repmgr_site_list(dbenv, countp, listp)
 		LOCK_MUTEX(db_rep->mutex);
 		locked = TRUE;
 
-		ENV_ENTER(env, ip);
 		if (rep->siteinfo_seq > db_rep->siteinfo_seq)
 			ret = __repmgr_sync_siteaddr(env);
-		ENV_LEAVE(env, ip);
 		if (ret != 0)
 			goto err;
 	} else {
@@ -329,6 +379,8 @@ __repmgr_site_list(dbenv, countp, listp)
 
 		if (FLD_ISSET(site->config, DB_REPMGR_PEER))
 			F_SET(&status[i], DB_REPMGR_ISPEER);
+		if (FLD_ISSET(site->gmdb_flags, SITE_VIEW))
+			F_SET(&status[i], DB_REPMGR_ISVIEW);
 
 		/*
 		 * If we haven't started a communications thread, connection
diff --git a/src/repmgr/repmgr_stub.c b/src/repmgr/repmgr_stub.c
index 734c2240..999b759f 100644
--- a/src/repmgr/repmgr_stub.c
+++ b/src/repmgr/repmgr_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -73,6 +73,69 @@ __repmgr_set_ack_policy(dbenv, policy)
 
 /*
  * PUBLIC: #ifndef HAVE_REPLICATION_THREADS
+ * PUBLIC: int __repmgr_get_incoming_queue_max __P((DB_ENV *, u_int32_t *,
+ * PUBLIC:     u_int32_t *));
+ * PUBLIC: #endif
+ */
+int
+__repmgr_get_incoming_queue_max(dbenv, messagesp, bulk_messagesp)
+	DB_ENV *dbenv;
+	u_int32_t *messagesp;
+	u_int32_t *bulk_messagesp;
+{
+	COMPQUIET(messagesp, NULL);
+	COMPQUIET(bulk_messagesp, NULL);
+	return (__db_norepmgr(dbenv));
+}
+
+/*
+ * PUBLIC: #ifndef HAVE_REPLICATION_THREADS
+ * PUBLIC: int __repmgr_set_incoming_queue_max __P((DB_ENV *, u_int32_t,
+ * PUBLIC:     u_int32_t));
+ * PUBLIC: #endif
+ */
+int
+__repmgr_set_incoming_queue_max(dbenv, messages, bulk_messages)
+	DB_ENV *dbenv;
+	u_int32_t messages;
+	u_int32_t bulk_messages;
+{
+	COMPQUIET(messages, 0);
+	COMPQUIET(bulk_messages, 0);
+	return (__db_norepmgr(dbenv));
+}
+
+/*
+ * PUBLIC: #ifndef HAVE_REPLICATION_THREADS
+ * PUBLIC: int __repmgr_get_incoming_queue_redzone __P((DB_ENV *,
+ * PUBLIC:     u_int32_t *, u_int32_t *));
+ * PUBLIC: #endif
+ */
+int __repmgr_get_incoming_queue_redzone(dbenv, gbytesp, bytesp)
+	DB_ENV *dbenv;
+	u_int32_t *gbytesp, *bytesp;
+{
+	COMPQUIET(gbytesp, NULL);
+	COMPQUIET(bytesp, NULL);
+	return (__db_norepmgr(dbenv));
+}
+
+/*
+ * PUBLIC: #ifndef HAVE_REPLICATION_THREADS
+ * PUBLIC: int __repmgr_get_incoming_queue_fullevent __P((DB_ENV *,
+ * PUBLIC:     int *));
+ * PUBLIC: #endif
+ */
+int __repmgr_get_incoming_queue_fullevent(dbenv, onoffp)
+	DB_ENV *dbenv;
+	int *onoffp;
+{
+	COMPQUIET(onoffp, NULL);
+	return (__db_norepmgr(dbenv));
+}
+
+/*
+ * PUBLIC: #ifndef HAVE_REPLICATION_THREADS
  * PUBLIC: int __repmgr_site
  * PUBLIC:     __P((DB_ENV *, const char *, u_int, DB_SITE **, u_int32_t));
  * PUBLIC: #endif
@@ -125,11 +188,12 @@ __repmgr_local_site(dbenv, dbsitep)
 
 /*
  * PUBLIC: #ifndef HAVE_REPLICATION_THREADS
- * PUBLIC: int __repmgr_site_list __P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
+ * PUBLIC: int __repmgr_site_list_pp
+ * PUBLIC:	__P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
  * PUBLIC: #endif
  */
 int
-__repmgr_site_list(dbenv, countp, listp)
+__repmgr_site_list_pp(dbenv, countp, listp)
 	DB_ENV *dbenv;
 	u_int *countp;
 	DB_REPMGR_SITE **listp;
@@ -141,11 +205,11 @@ __repmgr_site_list(dbenv, countp, listp)
 
 /*
  * PUBLIC: #ifndef HAVE_REPLICATION_THREADS
- * PUBLIC: int __repmgr_start __P((DB_ENV *, int, u_int32_t));
+ * PUBLIC: int __repmgr_start_pp __P((DB_ENV *, int, u_int32_t));
  * PUBLIC: #endif
  */
 int
-__repmgr_start(dbenv, nthreads, flags)
+__repmgr_start_pp(dbenv, nthreads, flags)
 	DB_ENV *dbenv;
 	int nthreads;
 	u_int32_t flags;
diff --git a/src/repmgr/repmgr_util.c b/src/repmgr/repmgr_util.c
index c2439436..1c5ebe59 100644
--- a/src/repmgr/repmgr_util.c
+++ b/src/repmgr/repmgr_util.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -15,9 +15,13 @@
 
 #define	INITIAL_SITES_ALLOCATION	3	     /* Arbitrary guess. */
 
+static int convert_gmdb(ENV *, DB_THREAD_INFO *, DB *, DB_TXN *);
 static int get_eid __P((ENV *, const char *, u_int, int *));
-static int __repmgr_addrcmp __P((repmgr_netaddr_t *, repmgr_netaddr_t *));
 static int read_gmdb __P((ENV *, DB_THREAD_INFO *, u_int8_t **, size_t *));
+static int __repmgr_addrcmp __P((repmgr_netaddr_t *, repmgr_netaddr_t *));
+static int __repmgr_find_commit __P((ENV *, DB_LSN *, DB_LSN *, int *));
+static int __repmgr_remote_lsnhist(ENV *, int, u_int32_t,
+    __repmgr_lsnhist_match_args *);
 
 /*
  * Schedules a future attempt to re-establish a connection with the given site.
@@ -43,6 +47,8 @@ __repmgr_schedule_connection_attempt(env, eid, immediate)
 	REP *rep;
 	REPMGR_RETRY *retry, *target;
 	REPMGR_SITE *site;
+	SITEINFO *sites;
+	db_timeout_t timeout;
 	db_timespec t;
 	int ret;
 
@@ -57,7 +63,24 @@ __repmgr_schedule_connection_attempt(env, eid, immediate)
 	if (immediate)
 		TAILQ_INSERT_HEAD(&db_rep->retries, retry, entries);
 	else {
-		TIMESPEC_ADD_DB_TIMEOUT(&t, rep->connection_retry_wait);
+		/*
+		 * Normally we retry a connection after connection retry
+		 * timeout.  In a subordinate rep-aware process, we retry sooner
+		 * when there is a listener candidate on the disconnected site.
+		 * The listener process will be connected from the new listener,
+		 * but subordinate rep-aware process can only wait for retry.
+		 * It matters when the subordinate process becomes listener and
+		 * the disconnected site is master.  The m_listener_wait is set
+		 * to retry after enough time has passed for a takeover.  The
+		 * number of listener candidates is maintained in the listener
+		 * process as it has connections to all subordinate processes
+		 * from other sites.
+		*/
+		timeout = rep->connection_retry_wait;
+		CHECK_LISTENER_CAND(timeout, >0, db_rep->m_listener_wait,
+		    timeout);
+		TIMESPEC_ADD_DB_TIMEOUT(&t, timeout);
+
 		/*
 		 * Insert the new "retry" on the (time-ordered) list in its
 		 * proper position.  To do so, find the list entry ("target")
@@ -284,6 +307,7 @@ __repmgr_new_site(env, sitep, host, port)
 	site->net_addr.host = p;
 	site->net_addr.port = (u_int16_t)port;
 
+	site->max_ack_gen = 0;
 	ZERO_LSN(site->max_ack);
 	site->ack_policy = 0;
 	site->alignment = 0;
@@ -295,6 +319,7 @@ __repmgr_new_site(env, sitep, host, port)
 	site->state = SITE_IDLE;
 
 	site->membership = 0;
+	site->gmdb_flags = 0;
 	site->config = 0;
 
 	*sitep = site;
@@ -535,11 +560,14 @@ __repmgr_thread_failure(env, why)
 	int why;
 {
 	DB_REP *db_rep;
+	DB_THREAD_INFO *ip;
 
 	db_rep = env->rep_handle;
+	ENV_ENTER(env, ip);
 	LOCK_MUTEX(db_rep->mutex);
 	(void)__repmgr_stop_threads(env);
 	UNLOCK_MUTEX(db_rep->mutex);
+	ENV_LEAVE(env, ip);
 	return (__env_panic(env, why));
 }
 
@@ -597,12 +625,13 @@ __repmgr_format_addr_loc(addr, buffer)
 }
 
 /*
- * PUBLIC: int __repmgr_repstart __P((ENV *, u_int32_t));
+ * PUBLIC: int __repmgr_repstart __P((ENV *, u_int32_t, u_int32_t));
  */
 int
-__repmgr_repstart(env, flags)
+__repmgr_repstart(env, flags, startopts)
 	ENV *env;
 	u_int32_t flags;
+	u_int32_t startopts;
 {
 	DBT my_addr;
 	int ret;
@@ -610,7 +639,11 @@ __repmgr_repstart(env, flags)
 	/* Include "cdata" in case sending to old-version site. */
 	if ((ret = __repmgr_prepare_my_addr(env, &my_addr)) != 0)
 		return (ret);
-	ret = __rep_start_int(env, &my_addr, flags);
+	/*
+	 * force_role_chg and hold_client_gen are used by preferred master
+	 * mode to help control site startup.
+	 */
+	ret = __rep_start_int(env, &my_addr, flags, startopts);
 	__os_free(env, my_addr.data);
 	if (ret != 0)
 		__db_err(env, ret, DB_STR("3673", "rep_start"));
@@ -618,11 +651,12 @@ __repmgr_repstart(env, flags)
 }
 
 /*
- * PUBLIC: int __repmgr_become_master __P((ENV *));
+ * PUBLIC: int __repmgr_become_master __P((ENV *, u_int32_t));
  */
 int
-__repmgr_become_master(env)
+__repmgr_become_master(env, startopts)
 	ENV *env;
+	u_int32_t startopts;
 {
 	DB_REP *db_rep;
 	DB_THREAD_INFO *ip;
@@ -631,7 +665,7 @@ __repmgr_become_master(env)
 	REPMGR_SITE *site;
 	DBT key_dbt, data_dbt;
 	__repmgr_membership_key_args key;
-	__repmgr_membership_data_args member_status;
+	__repmgr_membership_data_args member_data;
 	repmgr_netaddr_t addr;
 	u_int32_t status;
 	u_int8_t data_buf[__REPMGR_MEMBERSHIP_DATA_SIZE];
@@ -668,16 +702,23 @@ __repmgr_become_master(env)
 	db_rep->client_intent = FALSE;
 	UNLOCK_MUTEX(db_rep->mutex);
 
-	if ((ret = __repmgr_repstart(env, DB_REP_MASTER)) != 0)
+	if ((ret = __repmgr_repstart(env, DB_REP_MASTER, startopts)) != 0)
 		return (ret);
 
+	/*
+	 * Make sure member_version_gen is current so that this master
+	 * can reject obsolete member lists from other sites.
+	 */
+	db_rep->member_version_gen = db_rep->region->gen;
+
+	/* If there is already a gmdb, we are finished. */
 	if (db_rep->have_gmdb)
 		return (0);
 
-	db_rep->member_version_gen = db_rep->region->gen;
-	ENV_ENTER(env, ip);
+	/* There isn't a gmdb.  Create one from the in-memory site list. */
 	if ((ret = __repmgr_hold_master_role(env, NULL)) != 0)
 		goto leave;
+	ENV_GET_THREAD_INFO(env, ip);
 retry:
 	if ((ret = __repmgr_setup_gmdb_op(env, ip, &txn, DB_CREATE)) != 0)
 		goto err;
@@ -705,8 +746,9 @@ retry:
 		    &key, key_buf, sizeof(key_buf), &len);
 		DB_ASSERT(env, ret == 0);
 		DB_INIT_DBT(key_dbt, key_buf, len);
-		member_status.flags = status;
-		__repmgr_membership_data_marshal(env, &member_status, data_buf);
+		member_data.status = status;
+		member_data.flags = site->gmdb_flags;
+		__repmgr_membership_data_marshal(env, &member_data, data_buf);
 		DB_INIT_DBT(data_dbt, data_buf, __REPMGR_MEMBERSHIP_DATA_SIZE);
 		if ((ret = __db_put(dbp, ip, txn, &key_dbt, &data_dbt, 0)) != 0)
 			goto err;
@@ -726,7 +768,6 @@ err:
 	if ((t_ret = __repmgr_rlse_master_role(env)) != 0 && ret == 0)
 		ret = t_ret;
 leave:
-	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -840,6 +881,14 @@ __repmgr_open(env, rep_)
 	rep->election_retry_wait = db_rep->election_retry_wait;
 	rep->heartbeat_monitor_timeout = db_rep->heartbeat_monitor_timeout;
 	rep->heartbeat_frequency = db_rep->heartbeat_frequency;
+	rep->inqueue_max_gbytes = db_rep->inqueue_max_gbytes;
+	rep->inqueue_max_bytes = db_rep->inqueue_max_bytes;
+	if (rep->inqueue_max_gbytes == 0 && rep->inqueue_max_bytes == 0) {
+		rep->inqueue_max_bytes = DB_REPMGR_DEFAULT_INQUEUE_MAX;
+	}
+	__repmgr_set_incoming_queue_redzone(rep, rep->inqueue_max_gbytes,
+	    rep->inqueue_max_bytes);
+
 	return (ret);
 }
 
@@ -958,6 +1007,18 @@ __repmgr_join(env, rep_)
 	}
 
 	db_rep->siteinfo_seq = rep->siteinfo_seq;
+	/*
+	 * Update the incoming queue limit settings if necessary.
+	 */
+	if ((db_rep->inqueue_max_gbytes != 0 ||
+	    db_rep->inqueue_max_bytes != 0) &&
+	    (db_rep->inqueue_max_gbytes != rep->inqueue_max_gbytes ||
+	     db_rep->inqueue_max_bytes != rep->inqueue_max_gbytes)) {
+		rep->inqueue_max_gbytes = db_rep->inqueue_max_gbytes;
+		rep->inqueue_max_bytes = db_rep->inqueue_max_bytes;
+		__repmgr_set_incoming_queue_redzone(rep,
+		    rep->inqueue_max_gbytes, rep->inqueue_max_bytes);
+	}
 unlock:
 	MUTEX_UNLOCK(env, rep->mtx_repmgr);
 	return (ret);
@@ -1073,6 +1134,7 @@ __repmgr_share_netaddrs(env, rep_, start, limit)
 		shared_array[eid].addr.port = db_rep->sites[i].net_addr.port;
 		shared_array[eid].config = db_rep->sites[i].config;
 		shared_array[eid].status = db_rep->sites[i].membership;
+		shared_array[eid].flags = db_rep->sites[i].gmdb_flags;
 		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
 		    "EID %d is assigned for site %s:%lu",
 			eid, host, (u_long)shared_array[eid].addr.port));
@@ -1134,6 +1196,7 @@ __repmgr_copy_in_added_sites(env)
 		site = SITE_FROM_EID(i);
 		site->config = p->config;
 		site->membership = p->status;
+		site->gmdb_flags = p->flags;
 	}
 
 out:
@@ -1266,7 +1329,9 @@ __repmgr_stable_lsn(env, stable_lsn)
 	db_rep = env->rep_handle;
 	rep = db_rep->region;
 
-	if (rep->min_log_file != 0 && rep->min_log_file < stable_lsn->file) {
+	LOCK_MUTEX(db_rep->mutex);
+	if (rep->sites_avail != 0 && rep->min_log_file != 0 &&
+	    rep->min_log_file < stable_lsn->file) {
 		/*
 		 * Returning an LSN to be consistent with the rest of the
 		 * log archiving processing.  Construct LSN of format
@@ -1276,12 +1341,91 @@ __repmgr_stable_lsn(env, stable_lsn)
 		stable_lsn->offset = 0;
 	}
 	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
-	    "Repmgr_stable_lsn: Returning stable_lsn[%lu][%lu]",
-	    (u_long)stable_lsn->file, (u_long)stable_lsn->offset));
+"Repmgr_stable_lsn: Returning stable_lsn[%lu][%lu] sites_avail %lu min_log %lu",
+	    (u_long)stable_lsn->file, (u_long)stable_lsn->offset,
+	    (u_long)rep->sites_avail, (u_long)rep->min_log_file));
+	UNLOCK_MUTEX(db_rep->mutex);
 	return (0);
 }
 
 /*
+ * PUBLIC: int __repmgr_make_request_conn __P((ENV *,
+ * PUBLIC:     repmgr_netaddr_t *, REPMGR_CONNECTION **));
+ */
+int
+__repmgr_make_request_conn(env, addr, connp)
+	ENV *env;
+	repmgr_netaddr_t *addr;
+	REPMGR_CONNECTION **connp;
+{
+	DBT vi;
+	__repmgr_msg_hdr_args msg_hdr;
+	__repmgr_version_confirmation_args conf;
+	REPMGR_CONNECTION *conn;
+	int alloc, ret, unused;
+
+	alloc = FALSE;
+	if ((ret = __repmgr_connect(env, addr, &conn, &unused)) != 0)
+		return (ret);
+	conn->type = APP_CONNECTION;
+
+	/* Read a handshake msg, to get version confirmation and parameters. */
+	if ((ret = __repmgr_read_conn(conn)) != 0)
+		goto err;
+	/*
+	 * We can only get here after having read the full 9 bytes that we
+	 * expect, so this can't fail.
+	 */
+	DB_ASSERT(env, conn->reading_phase == SIZES_PHASE);
+	ret = __repmgr_msg_hdr_unmarshal(env, &msg_hdr,
+	    conn->msg_hdr_buf, __REPMGR_MSG_HDR_SIZE, NULL);
+	DB_ASSERT(env, ret == 0);
+	__repmgr_iovec_init(&conn->iovecs);
+	conn->reading_phase = DATA_PHASE;
+
+	if ((ret = __repmgr_prepare_simple_input(env, conn, &msg_hdr)) != 0)
+		goto err;
+	alloc = TRUE;
+
+	if ((ret = __repmgr_read_conn(conn)) != 0)
+		goto err;
+
+	/*
+	 * Analyze the handshake msg, and stash relevant info.
+	 */
+	if ((ret = __repmgr_find_version_info(env, conn, &vi)) != 0)
+		goto err;
+	DB_ASSERT(env, vi.size > 0);
+	if ((ret = __repmgr_version_confirmation_unmarshal(env,
+	    &conf, vi.data, vi.size, NULL)) != 0)
+		goto err;
+
+	if (conf.version < GM_MIN_VERSION ||
+	    (IS_VIEW_SITE(env) && conf.version < VIEW_MIN_VERSION) ||
+	    (PREFMAS_IS_SET(env) && conf.version < PREFMAS_MIN_VERSION)) {
+		ret = DB_REP_UNAVAIL;
+		goto err;
+	}
+	conn->version = conf.version;
+
+err:
+	if (alloc) {
+		DB_ASSERT(env, conn->input.repmgr_msg.cntrl.size > 0);
+		__os_free(env, conn->input.repmgr_msg.cntrl.data);
+		DB_ASSERT(env, conn->input.repmgr_msg.rec.size > 0);
+		__os_free(env, conn->input.repmgr_msg.rec.data);
+	}
+	__repmgr_reset_for_reading(conn);
+	if (ret == 0)
+		*connp = conn;
+	else {
+		(void)__repmgr_close_connection(env, conn);
+		(void)__repmgr_destroy_conn(env, conn);
+	}
+	return (ret);
+}
+
+/*
  * PUBLIC: int __repmgr_send_sync_msg __P((ENV *, REPMGR_CONNECTION *,
  * PUBLIC:     u_int32_t, u_int8_t *, u_int32_t));
  */
@@ -1311,15 +1455,511 @@ __repmgr_send_sync_msg(env, conn, type, buf, len)
 }
 
 /*
+ * Reads a whole message, when we expect to get a REPMGR_OWN_MSG.
+ */
+/*
+ * PUBLIC: int __repmgr_read_own_msg __P((ENV *, REPMGR_CONNECTION *,
+ * PUBLIC:     u_int32_t *, u_int8_t **, size_t *));
+ */
+int
+__repmgr_read_own_msg(env, conn, typep, bufp, lenp)
+	ENV *env;
+	REPMGR_CONNECTION *conn;
+	u_int32_t *typep;
+	u_int8_t **bufp;
+	size_t *lenp;
+{
+	__repmgr_msg_hdr_args msg_hdr;
+	u_int8_t *buf;
+	u_int32_t type;
+	size_t size;
+	int ret;
+
+	__repmgr_reset_for_reading(conn);
+	if ((ret = __repmgr_read_conn(conn)) != 0)
+		goto err;
+	ret = __repmgr_msg_hdr_unmarshal(env, &msg_hdr,
+	    conn->msg_hdr_buf, __REPMGR_MSG_HDR_SIZE, NULL);
+	DB_ASSERT(env, ret == 0);
+
+	if ((conn->msg_type = msg_hdr.type) != REPMGR_OWN_MSG) {
+		ret = DB_REP_UNAVAIL; /* Protocol violation. */
+		goto err;
+	}
+	type = REPMGR_OWN_MSG_TYPE(msg_hdr);
+	if ((size = (size_t)REPMGR_OWN_BUF_SIZE(msg_hdr)) > 0) {
+		conn->reading_phase = DATA_PHASE;
+		__repmgr_iovec_init(&conn->iovecs);
+
+		if ((ret = __os_malloc(env, size, &buf)) != 0)
+			goto err;
+		conn->input.rep_message = NULL;
+
+		__repmgr_add_buffer(&conn->iovecs, buf, size);
+		if ((ret = __repmgr_read_conn(conn)) != 0) {
+			__os_free(env, buf);
+			goto err;
+		}
+		*bufp = buf;
+	}
+
+	*typep = type;
+	*lenp = size;
+
+err:
+	return (ret);
+}
+
+/*
+ * Returns TRUE if we are connected to the other site in a preferred
+ * master replication group, FALSE otherwise.
+ *
+ * PUBLIC: int __repmgr_prefmas_connected __P((ENV *));
+ */
+int
+__repmgr_prefmas_connected(env)
+	ENV *env;
+{
+	DB_REP *db_rep;
+	REPMGR_CONNECTION *conn;
+	REPMGR_SITE *other_site;
+
+	db_rep = env->rep_handle;
+
+	/*
+	 * Preferred master mode only has 2 sites, so the other site is
+	 * always EID 1.
+	 */
+	if (!IS_PREFMAS_MODE(env) || !IS_KNOWN_REMOTE_SITE(1))
+		  return (FALSE);
+
+	other_site = SITE_FROM_EID(1);
+	if (other_site->state == SITE_CONNECTED)
+		return (TRUE);
+
+	if ((conn = other_site->ref.conn.in) != NULL &&
+	    IS_READY_STATE(conn->state))
+		return (TRUE);
+	if ((conn = other_site->ref.conn.out) != NULL &&
+	    IS_READY_STATE(conn->state))
+		return (TRUE);
+
+	return (FALSE);
+}
+
+/*
+ * Used by a preferred master site to restart the remote temporary master
+ * site as a client.  This is used to help guarantee that the preferred master
+ * site's transactions are never rolled back.
+ *
+ * PUBLIC: int __repmgr_restart_site_as_client __P((ENV *, int));
+ */
+int
+__repmgr_restart_site_as_client(env, eid)
+	ENV *env;
+	int eid;
+{
+	DB_REP *db_rep;
+	REPMGR_CONNECTION *conn;
+	repmgr_netaddr_t addr;
+	u_int32_t type;
+	size_t len;
+	u_int8_t any_value, *response_buf;
+	int ret, t_ret;
+
+	COMPQUIET(any_value, 0);
+	db_rep = env->rep_handle;
+	conn = NULL;
+
+	if (!IS_PREFMAS_MODE(env))
+		return (0);
+
+	LOCK_MUTEX(db_rep->mutex);
+	addr = SITE_FROM_EID(eid)->net_addr;
+	UNLOCK_MUTEX(db_rep->mutex);
+	if ((ret = __repmgr_make_request_conn(env, &addr, &conn)) != 0)
+		return (ret);
+
+	/*
+	 * No payload needed, but must send at least a dummy byte for the
+	 * other side to recognize that a message has arrived.
+	 */
+	if ((ret = __repmgr_send_sync_msg(env, conn,
+	    REPMGR_RESTART_CLIENT, VOID_STAR_CAST &any_value, 1)) != 0)
+		goto err;
+
+	if ((ret = __repmgr_read_own_msg(env,
+	    conn, &type, &response_buf, &len)) != 0)
+		goto err;
+	if (type != REPMGR_PREFMAS_SUCCESS) {
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "restart_site_as_client got unexpected message type %d",
+		    type));
+		ret = DB_REP_UNAVAIL; /* Invalid response: protocol violation */
+	}
+err:
+	if (conn != NULL) {
+		if ((t_ret = __repmgr_close_connection(env,
+		    conn)) != 0 && ret != 0)
+			ret = t_ret;
+		if ((t_ret = __repmgr_destroy_conn(env,
+		    conn)) != 0 && ret != 0)
+			ret = t_ret;
+	}
+	return (ret);
+}
+
+/*
+ * Used by a preferred master site to make the remote temporary master
+ * site a readonly master.  This is used to help preserve all temporary
+ * master transactions.
+ *
+ * PUBLIC: int __repmgr_make_site_readonly_master __P((ENV *, int,
+ * PUBLIC:     u_int32_t *, DB_LSN *));
+ */
+int
+__repmgr_make_site_readonly_master(env, eid, gen, sync_lsnp)
+	ENV *env;
+	int eid;
+	u_int32_t *gen;
+	DB_LSN *sync_lsnp;
+{
+	DB_REP *db_rep;
+	REPMGR_CONNECTION *conn;
+	repmgr_netaddr_t addr;
+	__repmgr_permlsn_args permlsn;
+	u_int32_t type;
+	size_t len;
+	u_int8_t any_value, *response_buf;
+	int ret, t_ret;
+
+	COMPQUIET(any_value, 0);
+	db_rep = env->rep_handle;
+	conn = NULL;
+	response_buf = NULL;
+	*gen = 0;
+	ZERO_LSN(*sync_lsnp);
+
+	if (!IS_PREFMAS_MODE(env))
+		return (0);
+
+	LOCK_MUTEX(db_rep->mutex);
+	addr = SITE_FROM_EID(eid)->net_addr;
+	UNLOCK_MUTEX(db_rep->mutex);
+	if ((ret = __repmgr_make_request_conn(env, &addr, &conn)) != 0)
+		return (ret);
+
+	/*
+	 * No payload needed, but must send at least a dummy byte for the
+	 * other side to recognize that a message has arrived.
+	 */
+	if ((ret = __repmgr_send_sync_msg(env, conn,
+	    REPMGR_READONLY_MASTER, VOID_STAR_CAST &any_value, 1)) != 0)
+		goto err;
+
+	if ((ret = __repmgr_read_own_msg(env,
+	    conn, &type, &response_buf, &len)) != 0)
+		goto err;
+
+	if (type == REPMGR_READONLY_RESPONSE) {
+		if ((ret = __repmgr_permlsn_unmarshal(env,
+		    &permlsn, response_buf, len, NULL)) != 0)
+			goto err;
+		*gen = permlsn.generation;
+		*sync_lsnp = permlsn.lsn;
+	} else {
+		RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+		    "make_site_readonly_master got unexpected message type %d",
+		    type));
+		ret = DB_REP_UNAVAIL; /* Invalid response: protocol violation */
+	}
+
+err:
+	if (conn != NULL) {
+		if ((t_ret = __repmgr_close_connection(env,
+		    conn)) != 0 && ret != 0)
+			ret = t_ret;
+		if ((t_ret = __repmgr_destroy_conn(env,
+		    conn)) != 0 && ret != 0)
+			ret = t_ret;
+	}
+	if (response_buf != NULL)
+		__os_free(env, response_buf);
+	return (ret);
+}
+
+/*
+ * Used by a preferred master site to perform the LSN history comparisons to
+ * determine whether there is are continuous or conflicting sets of
+ * transactions between this site and the remote temporary master.
+ *
+ * PUBLIC: int __repmgr_lsnhist_match __P((ENV *,
+ * PUBLIC:     DB_THREAD_INFO *, int, int *));
+ */
+int
+__repmgr_lsnhist_match(env, ip, eid, match)
+	ENV *env;
+	DB_THREAD_INFO *ip;
+	int eid;
+	int *match;
+{
+	DB_REP *db_rep;
+	REP *rep;
+	__rep_lsn_hist_data_args my_lsnhist;
+	__repmgr_lsnhist_match_args remote_lsnhist;
+	u_int32_t my_gen;
+	int found_commit, ret;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+	*match = FALSE;
+	my_gen = rep->gen;
+	found_commit = FALSE;
+
+	if (!IS_PREFMAS_MODE(env))
+		  return (0);
+
+	/* Get local LSN history information for comparison. */
+	if ((ret = __rep_get_lsnhist_data(env, ip, my_gen, &my_lsnhist)) != 0)
+		return (ret);
+
+	/* Get remote LSN history information for comparison. */
+	ret = __repmgr_remote_lsnhist(env, eid, my_gen, &remote_lsnhist);
+
+	/*
+	 * If the current gen doesn't exist at the remote site, the match
+	 * fails.
+	 *
+	 * If the remote LSN or timestamp at the current gen doesn't match
+	 * ours, we probably had a whack-a-mole situation where each site
+	 * as up and down in isolation one or more times and the match fails.
+	 *
+	 * If the remote LSN for the next generation is lower than this
+	 * site's startup LSN and there are any commit operations between
+	 * these LSNs, there are conflicting sets of transactions and the
+	 * match fails.
+	 */
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "lsnhist_match my_lsn [%lu][%lu] remote_lsn [%lu][%lu]",
+	    (u_long)my_lsnhist.lsn.file, (u_long)my_lsnhist.lsn.offset,
+	    (u_long)remote_lsnhist.lsn.file,
+	    (u_long)remote_lsnhist.lsn.offset));
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "lsnhist_match my_time %lu:%lu remote_time %lu:%lu",
+	    (u_long)my_lsnhist.hist_sec, (u_long)my_lsnhist.hist_nsec,
+	    (u_long)remote_lsnhist.hist_sec, (u_long)remote_lsnhist.hist_nsec));
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "lsnhist_match pminit_lsn [%lu][%lu] next_gen_lsn [%lu][%lu]",
+	    (u_long)db_rep->prefmas_init_lsn.file,
+	    (u_long)db_rep->prefmas_init_lsn.offset,
+	    (u_long)remote_lsnhist.next_gen_lsn.file,
+	    (u_long)remote_lsnhist.next_gen_lsn.offset));
+	if (ret != DB_REP_UNAVAIL &&
+	    LOG_COMPARE(&my_lsnhist.lsn, &remote_lsnhist.lsn) == 0 &&
+	    my_lsnhist.hist_sec == remote_lsnhist.hist_sec &&
+	    my_lsnhist.hist_nsec == remote_lsnhist.hist_nsec) {
+		/*
+		 * If the remote site doesn't yet have the next gen or if
+		 * our startup LSN is <= than the remote next gen LSN, we
+		 * have a match.
+		 *
+		 * Otherwise, our startup LSN is higher than the remote
+		 * next gen LSN.  If we have any commit operations between
+		 * these two LSNs, we have preferred master operations we
+		 * must preserve and there is not a match.  But if we just
+		 * have uncommitted operations between these LSNs it doesn't
+		 * matter if they are rolled back, so we call it a match and
+		 * try to retain temporary master transactions if possible.
+		 */
+		if (IS_ZERO_LSN(remote_lsnhist.next_gen_lsn) ||
+		    LOG_COMPARE(&db_rep->prefmas_init_lsn,
+		    &remote_lsnhist.next_gen_lsn) <= 0)
+			*match = TRUE;
+		else if ((ret = __repmgr_find_commit(env,
+		    &remote_lsnhist.next_gen_lsn,
+		    &db_rep->prefmas_init_lsn, &found_commit)) == 0 &&
+		    !found_commit) {
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "lsnhist_match !found_commit set match TRUE"));
+			*match = TRUE;
+		}
+	}
+
+	/* Don't return an error if current gen didn't exist at remote site. */
+	if (ret == DB_REP_UNAVAIL)
+		ret = 0;
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "lsnhist_match match %d returning %d", *match, ret));
+	return (ret);
+}
+
+/*
+ * Checks a range of log records from low_lsn to high_lsn for any
+ * commit operations.  Sets found_commit to TRUE if a commit is
+ * found.
+ */
+static int
+__repmgr_find_commit(env, low_lsn, high_lsn, found_commit)
+	ENV *env;
+	DB_LSN *low_lsn;
+	DB_LSN *high_lsn;
+	int *found_commit;
+{
+	DB_LOGC *logc;
+	DB_LSN lsn;
+	DBT rec;
+	__txn_regop_args *txn_args;
+	u_int32_t rectype;
+	int ret, t_ret;
+
+	*found_commit = FALSE;
+	ret = 0;
+
+	lsn = *low_lsn;
+	if ((ret = __log_cursor(env, &logc)) != 0)
+		return (ret);
+	memset(&rec, 0, sizeof(rec));
+	if (__logc_get(logc, &lsn, &rec, DB_SET) == 0) {
+		do {
+			LOGCOPY_32(env, &rectype, rec.data);
+			if (rectype == DB___txn_regop) {
+				if ((ret = __txn_regop_read(
+				    env, rec.data, &txn_args)) != 0)
+					goto close_cursor;
+				if (txn_args->opcode == TXN_COMMIT) {
+					*found_commit = TRUE;
+					__os_free(env, txn_args);
+					break;
+				}
+				__os_free(env, txn_args);
+			}
+		} while ((ret = __logc_get(logc, &lsn, &rec, DB_NEXT)) == 0 &&
+		    LOG_COMPARE(&lsn, high_lsn) <= 0);
+	}
+close_cursor:
+	if ((t_ret = __logc_close(logc)) != 0 && ret == 0)
+		ret = t_ret;
+	return (ret);
+}
+
+/*
+ * Used by a preferred master site to get remote LSN history information
+ * from the other site in the replication group.
+ */
+static int
+__repmgr_remote_lsnhist(env, eid, gen, lsnhist_match)
+	ENV *env;
+	int eid;
+	u_int32_t gen;
+	__repmgr_lsnhist_match_args *lsnhist_match;
+{
+	DB_REP *db_rep;
+	REPMGR_CONNECTION *conn;
+	repmgr_netaddr_t addr;
+	__rep_lsn_hist_key_args lsnhist_key;
+	u_int8_t lsnhist_key_buf[__REP_LSN_HIST_KEY_SIZE];
+	u_int32_t type;
+	size_t len;
+	u_int8_t *response_buf;
+	int ret, t_ret;
+
+	db_rep = env->rep_handle;
+	conn = NULL;
+	response_buf = NULL;
+
+	if (!IS_KNOWN_REMOTE_SITE(eid))
+		  return (0);
+
+	LOCK_MUTEX(db_rep->mutex);
+	addr = SITE_FROM_EID(eid)->net_addr;
+	UNLOCK_MUTEX(db_rep->mutex);
+	if ((ret = __repmgr_make_request_conn(env, &addr, &conn)) != 0)
+		return (ret);
+
+	/* Marshal generation for which to request remote lsnhist data. */
+	lsnhist_key.version = REP_LSN_HISTORY_FMT_VERSION;
+	lsnhist_key.gen = gen;
+	__rep_lsn_hist_key_marshal(env, &lsnhist_key, lsnhist_key_buf);
+	if ((ret = __repmgr_send_sync_msg(env, conn, REPMGR_LSNHIST_REQUEST,
+	    lsnhist_key_buf, sizeof(lsnhist_key_buf))) != 0)
+		goto err;
+
+	if ((ret = __repmgr_read_own_msg(env,
+	    conn, &type, &response_buf, &len)) != 0)
+		goto err;
+
+	/* Unmarshal remote lsnhist time and LSNs for comparison. */
+	if (type == REPMGR_LSNHIST_RESPONSE) {
+		if ((ret = __repmgr_lsnhist_match_unmarshal(env, lsnhist_match,
+		    response_buf, __REPMGR_LSNHIST_MATCH_SIZE, NULL)) != 0)
+			goto err;
+	} else {
+		/*
+		 * If the other site sent back REPMGR_PREFMAS_FAILURE, it means
+		 * no lsnhist record for the requested gen was found on other
+		 * site.
+		 */
+		if (type != REPMGR_PREFMAS_FAILURE)
+			RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+			    "remote_lsnhist got unexpected message type %d",
+			    type));
+		ret = DB_REP_UNAVAIL;
+	}
+
+err:
+	if (conn != NULL) {
+		if ((t_ret = __repmgr_close_connection(env,
+		    conn)) != 0 && ret != 0)
+			ret = t_ret;
+		if ((t_ret = __repmgr_destroy_conn(env,
+		    conn)) != 0 && ret != 0)
+			ret = t_ret;
+	}
+	if (response_buf != NULL)
+		__os_free(env, response_buf);
+	return (ret);
+}
+
+/*
+ * Returns the number of tries and the amount of time to yield the
+ * processor for preferred master waits.  The total wait is the larger
+ * of 2 seconds or 3 * ack_timeout.
+ *
+ * PUBLIC: int __repmgr_prefmas_get_wait __P((ENV *, u_int32_t *, u_long *));
+ */
+int
+__repmgr_prefmas_get_wait(env, tries, yield_usecs)
+	ENV *env;
+	u_int32_t *tries;
+	u_long *yield_usecs;
+{
+	DB_REP *db_rep;
+	REP *rep;
+	db_timeout_t max_wait;
+
+	db_rep = env->rep_handle;
+	rep = db_rep->region;
+
+	*yield_usecs = 250000;
+	max_wait = DB_REPMGR_DEFAULT_ACK_TIMEOUT * 2;
+	if ((rep->ack_timeout * 3) > max_wait)
+		max_wait = rep->ack_timeout * 3;
+	*tries = max_wait / (u_int32_t)*yield_usecs;
+	return (0);
+}
+
+/*
  * Produce a membership list from the known info currently in memory.
  *
- * PUBLIC: int __repmgr_marshal_member_list __P((ENV *, u_int8_t **, size_t *));
+ * PUBLIC: int __repmgr_marshal_member_list __P((ENV *, u_int32_t,
+ * PUBLIC:     u_int8_t **, size_t *));
  *
  * Caller must hold mutex.
  */
 int
-__repmgr_marshal_member_list(env, bufp, lenp)
+__repmgr_marshal_member_list(env, msg_version, bufp, lenp)
 	ENV *env;
+	u_int32_t msg_version;
 	u_int8_t **bufp;
 	size_t *lenp;
 {
@@ -1328,6 +1968,7 @@ __repmgr_marshal_member_list(env, bufp, lenp)
 	REPMGR_SITE *site;
 	__repmgr_membr_vers_args membr_vers;
 	__repmgr_site_info_args site_info;
+	__repmgr_v4site_info_args v4site_info;
 	u_int8_t *buf, *p;
 	size_t bufsize, len;
 	u_int i;
@@ -1353,14 +1994,24 @@ __repmgr_marshal_member_list(env, bufp, lenp)
 		if (site->membership == 0)
 			continue;
 
-		site_info.host.data = site->net_addr.host;
-		site_info.host.size =
-		    (u_int32_t)strlen(site->net_addr.host) + 1;
-		site_info.port = site->net_addr.port;
-		site_info.flags = site->membership;
-
-		ret = __repmgr_site_info_marshal(env,
-		    &site_info, p, (size_t)(&buf[bufsize]-p), &len);
+		if (msg_version < 5) {
+			v4site_info.host.data = site->net_addr.host;
+			v4site_info.host.size =
+				(u_int32_t)strlen(site->net_addr.host) + 1;
+			v4site_info.port = site->net_addr.port;
+			v4site_info.flags = site->membership;
+			ret = __repmgr_v4site_info_marshal(env,
+			    &v4site_info, p, (size_t)(&buf[bufsize]-p), &len);
+		} else {
+			site_info.host.data = site->net_addr.host;
+			site_info.host.size =
+				(u_int32_t)strlen(site->net_addr.host) + 1;
+			site_info.port = site->net_addr.port;
+			site_info.status = site->membership;
+			site_info.flags = site->gmdb_flags;
+			ret = __repmgr_site_info_marshal(env,
+			    &site_info, p, (size_t)(&buf[bufsize]-p), &len);
+		}
 		DB_ASSERT(env, ret == 0);
 		p += len;
 	}
@@ -1387,7 +2038,7 @@ read_gmdb(env, ip, bufp, lenp)
 	DBC *dbc;
 	DBT key_dbt, data_dbt;
 	__repmgr_membership_key_args key;
-	__repmgr_membership_data_args member_status;
+	__repmgr_membership_data_args member_data;
 	__repmgr_member_metadata_args metadata;
 	__repmgr_membr_vers_args membr_vers;
 	__repmgr_site_info_args site_info;
@@ -1435,8 +2086,13 @@ read_gmdb(env, ip, bufp, lenp)
 	ret = __repmgr_member_metadata_unmarshal(env,
 	    &metadata, metadata_buf, data_dbt.size, NULL);
 	DB_ASSERT(env, ret == 0);
-	DB_ASSERT(env, metadata.format == REPMGR_GMDB_FMT_VERSION);
+	DB_ASSERT(env, metadata.format >= REPMGR_GMDB_FMT_MIN_VERSION &&
+	    metadata.format <= REPMGR_GMDB_FMT_VERSION);
 	DB_ASSERT(env, metadata.version > 0);
+	/* Automatic conversion of old format gmdb if needed. */
+	if (metadata.format < REPMGR_GMDB_FMT_VERSION &&
+	    (ret = convert_gmdb(env, ip, dbp, txn)) != 0)
+		goto err;
 
 	bufsize = 1000;		/* Initial guess. */
 	if ((ret = __os_malloc(env, bufsize, &buf)) != 0)
@@ -1459,13 +2115,14 @@ read_gmdb(env, ip, bufp, lenp)
 		DB_ASSERT(env, key.port > 0);
 
 		ret = __repmgr_membership_data_unmarshal(env,
-		    &member_status, data_buf, data_dbt.size, NULL);
+		    &member_data, data_buf, data_dbt.size, NULL);
 		DB_ASSERT(env, ret == 0);
-		DB_ASSERT(env, member_status.flags != 0);
+		DB_ASSERT(env, member_data.status != 0);
 
 		site_info.host = key.host;
 		site_info.port = key.port;
-		site_info.flags = member_status.flags;
+		site_info.status = member_data.status;
+		site_info.flags = member_data.flags;
 		if ((ret = __repmgr_site_info_marshal(env, &site_info,
 		    p, (size_t)(&buf[bufsize]-p), &len)) == ENOMEM) {
 			bufsize *= 2;
@@ -1501,28 +2158,129 @@ err:
 }
 
 /*
+ * Convert an older-format group membership database into the current format.
+ */
+static int
+convert_gmdb(env, ip, dbp, txn)
+	ENV *env;
+	DB_THREAD_INFO *ip;
+	DB *dbp;
+	DB_TXN *txn;
+{
+	DBC *dbc;
+	DBT key_dbt, data_dbt, v4data_dbt;
+	__repmgr_membership_key_args key;
+	__repmgr_membership_data_args member_data;
+	__repmgr_v4membership_data_args v4member_data;
+	__repmgr_member_metadata_args metadata;
+	u_int8_t data_buf[__REPMGR_MEMBERSHIP_DATA_SIZE];
+	u_int8_t key_buf[MAX_MSG_BUF];
+	u_int8_t metadata_buf[__REPMGR_MEMBER_METADATA_SIZE];
+	u_int8_t v4data_buf[__REPMGR_V4MEMBERSHIP_DATA_SIZE];
+	int ret, t_ret;
+
+	dbc = NULL;
+
+	if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0)
+		goto err;
+
+	memset(&key_dbt, 0, sizeof(key_dbt));
+	key_dbt.data = key_buf;
+	key_dbt.ulen = sizeof(key_buf);
+	F_SET(&key_dbt, DB_DBT_USERMEM);
+	memset(&data_dbt, 0, sizeof(data_dbt));
+	data_dbt.data = metadata_buf;
+	data_dbt.ulen = sizeof(metadata_buf);
+	F_SET(&data_dbt, DB_DBT_USERMEM);
+	memset(&v4data_dbt, 0, sizeof(v4data_dbt));
+	v4data_dbt.data = v4data_buf;
+	v4data_dbt.ulen = sizeof(v4data_buf);
+	F_SET(&v4data_dbt, DB_DBT_USERMEM);
+
+	/*
+	 * The first gmdb record is a special metadata record that contains
+	 * an empty key and gmdb metadata (format and version) and has already
+	 * been validated by the caller.  We need to update its format value
+	 * for this conversion but leave the version alone.
+	 */
+	if ((ret = __dbc_get(dbc, &key_dbt, &data_dbt, DB_NEXT)) != 0)
+		goto err;
+	ret = __repmgr_membership_key_unmarshal(env,
+	    &key, key_buf, key_dbt.size, NULL);
+	DB_ASSERT(env, ret == 0);
+	DB_ASSERT(env, key.host.size == 0);
+	DB_ASSERT(env, key.port == 0);
+	ret = __repmgr_member_metadata_unmarshal(env,
+	    &metadata, metadata_buf, data_dbt.size, NULL);
+	DB_ASSERT(env, ret == 0);
+	DB_ASSERT(env, metadata.version > 0);
+	metadata.format = REPMGR_GMDB_FMT_VERSION;
+	__repmgr_member_metadata_marshal(env, &metadata, metadata_buf);
+	DB_INIT_DBT(data_dbt, metadata_buf, __REPMGR_MEMBER_METADATA_SIZE);
+	if ((ret = __dbc_put(dbc, &key_dbt, &data_dbt, DB_CURRENT)) != 0)
+		goto err;
+
+	/*
+	 * The rest of the gmdb records contain a key (host and port) and
+	 * membership data (status and now flags).  But the old format was
+	 * using flags for the status value, so we need to transfer the
+	 * old flags value to status and provide an empty flags value for
+	 * this conversion.
+	 */
+	data_dbt.data = data_buf;
+	data_dbt.ulen = sizeof(data_buf);
+	while ((ret = __dbc_get(dbc, &key_dbt, &v4data_dbt, DB_NEXT)) == 0) {
+		/* Get membership data in old format. */
+		ret = __repmgr_v4membership_data_unmarshal(env,
+		    &v4member_data, v4data_buf, v4data_dbt.size, NULL);
+		DB_ASSERT(env, ret == 0);
+		DB_ASSERT(env, v4member_data.flags != 0);
+
+		/* Convert membership data into current format and update. */
+		member_data.status = v4member_data.flags;
+		member_data.flags = 0;
+		__repmgr_membership_data_marshal(env, &member_data, data_buf);
+		DB_INIT_DBT(data_dbt, data_buf, __REPMGR_MEMBERSHIP_DATA_SIZE);
+		if ((ret = __dbc_put(dbc,
+		    &key_dbt, &data_dbt, DB_CURRENT)) != 0)
+			goto err;
+	}
+	if (ret == DB_NOTFOUND)
+		ret = 0;
+
+err:
+	if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+	return (ret);
+}
+
+/*
  * Refresh our sites array from the given membership list.
  *
  * PUBLIC: int __repmgr_refresh_membership __P((ENV *,
- * PUBLIC:     u_int8_t *, size_t));
+ * PUBLIC:     u_int8_t *, size_t, u_int32_t));
  */
 int
-__repmgr_refresh_membership(env, buf, len)
+__repmgr_refresh_membership(env, buf, len, version)
 	ENV *env;
 	u_int8_t *buf;
 	size_t len;
+	u_int32_t version;
 {
 	DB_REP *db_rep;
+	REP *rep;
 	REPMGR_SITE *site;
 	__repmgr_membr_vers_args membr_vers;
 	__repmgr_site_info_args site_info;
+	__repmgr_v4site_info_args v4site_info;
 	char *host;
 	u_int8_t *p;
 	u_int16_t port;
-	u_int32_t i, n;
+	u_int32_t i, participants;
 	int eid, ret;
 
 	db_rep = env->rep_handle;
+	rep = db_rep->region;
 
 	/*
 	 * Membership list consists of membr_vers followed by a number of
@@ -1546,9 +2304,17 @@ __repmgr_refresh_membership(env, buf, len)
 	for (i = 0; i < db_rep->site_cnt; i++)
 		F_CLR(SITE_FROM_EID(i), SITE_TOUCHED);
 
-	for (n = 0; p < &buf[len]; ++n) {
-		ret = __repmgr_site_info_unmarshal(env,
-		    &site_info, p, (size_t)(&buf[len] - p), &p);
+	for (participants = 0; p < &buf[len]; ) {
+		if (version < 5) {
+			ret = __repmgr_v4site_info_unmarshal(env,
+			    &v4site_info, p, (size_t)(&buf[len] - p), &p);
+			site_info.host = v4site_info.host;
+			site_info.port = v4site_info.port;
+			site_info.status = v4site_info.flags;
+			site_info.flags = 0;
+		} else
+			ret = __repmgr_site_info_unmarshal(env,
+			    &site_info, p, (size_t)(&buf[len] - p), &p);
 		DB_ASSERT(env, ret == 0);
 
 		host = site_info.host.data;
@@ -1556,9 +2322,11 @@ __repmgr_refresh_membership(env, buf, len)
 		    (u_int8_t*)site_info.host.data + site_info.host.size <= p);
 		host[site_info.host.size-1] = '\0';
 		port = site_info.port;
+		if (!FLD_ISSET(site_info.flags, SITE_VIEW))
+			participants++;
 
 		if ((ret = __repmgr_set_membership(env,
-		    host, port, site_info.flags)) != 0)
+		    host, port, site_info.status, site_info.flags)) != 0)
 			goto err;
 
 		if ((ret = __repmgr_find_site(env, host, port, &eid)) != 0)
@@ -1566,8 +2334,13 @@ __repmgr_refresh_membership(env, buf, len)
 		DB_ASSERT(env, IS_VALID_EID(eid));
 		F_SET(SITE_FROM_EID(eid), SITE_TOUCHED);
 	}
-	ret = __rep_set_nsites_int(env, n);
+	ret = __rep_set_nsites_int(env, participants);
 	DB_ASSERT(env, ret == 0);
+	if (FLD_ISSET(rep->config,
+	    REP_C_PREFMAS_MASTER | REP_C_PREFMAS_CLIENT) &&
+	    rep->config_nsites > 2)
+		__db_errx(env, DB_STR("3703",
+	    "More than two sites in preferred master replication group"));
 
 	/* Scan "touched" flags so as to notice sites that have been removed. */
 	for (i = 0; i < db_rep->site_cnt; i++) {
@@ -1576,7 +2349,8 @@ __repmgr_refresh_membership(env, buf, len)
 			continue;
 		host = site->net_addr.host;
 		port = site->net_addr.port;
-		if ((ret = __repmgr_set_membership(env, host, port, 0)) != 0)
+		if ((ret = __repmgr_set_membership(env, host, port,
+		    0, site->gmdb_flags)) != 0)
 			goto err;
 	}
 
@@ -1597,13 +2371,13 @@ __repmgr_reload_gmdb(env)
 	size_t len;
 	int ret;
 
-	ENV_ENTER(env, ip);
+	ENV_GET_THREAD_INFO(env, ip);
 	if ((ret = read_gmdb(env, ip, &buf, &len)) == 0) {
 		env->rep_handle->have_gmdb = TRUE;
-		ret = __repmgr_refresh_membership(env, buf, len);
+		ret = __repmgr_refresh_membership(env, buf, len,
+			DB_REPMGR_VERSION);
 		__os_free(env, buf);
 	}
-	ENV_LEAVE(env, ip);
 	return (ret);
 }
 
@@ -1650,7 +2424,8 @@ __repmgr_init_save(env, dbt)
 		dbt->data = NULL;
 		dbt->size = 0;
 		ret = 0;
-	} else if ((ret = __repmgr_marshal_member_list(env, &buf, &len)) == 0) {
+	} else if ((ret = __repmgr_marshal_member_list(env,
+	    DB_REPMGR_VERSION, &buf, &len)) == 0) {
 		dbt->data = buf;
 		dbt->size = (u_int32_t)len;
 	}
@@ -1700,6 +2475,7 @@ __repmgr_defer_op(env, op)
 	 */
 	if ((ret = __os_calloc(env, 1, sizeof(*msg), &msg)) != 0)
 		return (ret);
+	msg->size = sizeof(*msg);
 	msg->msg_hdr.type = REPMGR_OWN_MSG;
 	REPMGR_OWN_MSG_TYPE(msg->msg_hdr) = op;
 	ret = __repmgr_queue_put(env, msg);
@@ -1771,7 +2547,7 @@ __repmgr_become_client(env)
 	if ((ret = __repmgr_await_gmdbop(env)) == 0)
 		db_rep->client_intent = TRUE;
 	UNLOCK_MUTEX(db_rep->mutex);
-	return (ret == 0 ? __repmgr_repstart(env, DB_REP_CLIENT) : ret);
+	return (ret == 0 ? __repmgr_repstart(env, DB_REP_CLIENT, 0) : ret);
 }
 
 /*
@@ -1897,16 +2673,17 @@ get_eid(env, host, port, eidp)
  * accordingly.
  *
  * PUBLIC: int __repmgr_set_membership __P((ENV *,
- * PUBLIC:     const char *, u_int, u_int32_t));
+ * PUBLIC:     const char *, u_int, u_int32_t, u_int32_t));
  *
  * Caller must host db_rep mutex, and be in ENV_ENTER context.
  */
 int
-__repmgr_set_membership(env, host, port, status)
+__repmgr_set_membership(env, host, port, status, flags)
 	ENV *env;
 	const char *host;
 	u_int port;
 	u_int32_t status;
+	u_int32_t flags;
 {
 	DB_REP *db_rep;
 	REP *rep;
@@ -1953,7 +2730,9 @@ __repmgr_set_membership(env, host, port, status)
 
 		/* Set both private and shared copies of the info. */
 		site->membership = status;
+		site->gmdb_flags = flags;
 		sites[eid].status = status;
+		sites[eid].flags = flags;
 	}
 	MUTEX_UNLOCK(env, rep->mtx_repmgr);
 
@@ -1965,7 +2744,8 @@ __repmgr_set_membership(env, host, port, status)
 	    SELECTOR_RUNNING(db_rep)) {
 
 		if (eid == db_rep->self_eid && status != SITE_PRESENT)
-			ret = DB_DELETED;
+			ret = (status == SITE_ADDING) ?
+			    __repmgr_defer_op(env, REPMGR_REJOIN) : DB_DELETED;
 		else if (orig != SITE_PRESENT && status == SITE_PRESENT &&
 		    site->state == SITE_IDLE) {
 			/*
@@ -1981,10 +2761,11 @@ __repmgr_set_membership(env, host, port, status)
 			 * failure shouldn't hurt anything, because we'll just
 			 * naturally try again later.
 			 */
-			ret = __repmgr_schedule_connection_attempt(env,
-			    eid, TRUE);
-			if (eid != db_rep->self_eid)
+			if (eid != db_rep->self_eid) {
+				ret = __repmgr_schedule_connection_attempt(env,
+				    eid, TRUE);
 				DB_EVENT(env, DB_EVENT_REP_SITE_ADDED, &eid);
+			}
 		} else if (orig != 0 && status == 0)
 			DB_EVENT(env, DB_EVENT_REP_SITE_REMOVED, &eid);
 
@@ -2084,3 +2865,73 @@ __repmgr_bcast_own_msg(env, type, buf, len)
 	}
 	return (0);
 }
+
+/*
+ * PUBLIC: int __repmgr_bcast_member_list __P((ENV *));
+ *
+ * Broadcast membership list to all other sites in the replication group.
+ *
+ * Caller must hold mutex.
+ */
+int
+__repmgr_bcast_member_list(env)
+	ENV *env;
+{
+	DB_REP *db_rep;
+	REPMGR_CONNECTION *conn;
+	REPMGR_SITE *site;
+	u_int8_t *buf, *v4buf;
+	size_t len, v4len;
+	int ret;
+	u_int i;
+
+	db_rep = env->rep_handle;
+	if (!SELECTOR_RUNNING(db_rep))
+		return (0);
+	buf = NULL;
+	v4buf = NULL;
+	LOCK_MUTEX(db_rep->mutex);
+	/*
+	 * Some of the other sites in the replication group might be at
+	 * an older version, so we need to be able to send the membership
+	 * list in the current or older format.
+	 */
+	if ((ret = __repmgr_marshal_member_list(env,
+	    DB_REPMGR_VERSION, &buf, &len)) != 0 ||
+	    (ret = __repmgr_marshal_member_list(env,
+	    4, &v4buf, &v4len)) != 0) {
+		UNLOCK_MUTEX(db_rep->mutex);
+		goto out;
+	}
+	UNLOCK_MUTEX(db_rep->mutex);
+
+	RPRINT(env, (env, DB_VERB_REPMGR_MISC,
+	    "Broadcast latest membership list"));
+	FOR_EACH_REMOTE_SITE_INDEX(i) {
+		site = SITE_FROM_EID(i);
+		if (site->state != SITE_CONNECTED)
+			continue;
+		if ((conn = site->ref.conn.in) != NULL &&
+		    conn->state == CONN_READY &&
+		    (ret = __repmgr_send_own_msg(env, conn, REPMGR_SHARING,
+		    (conn->version < 5 ? v4buf : buf),
+		    (conn->version < 5 ? (u_int32_t) v4len : (u_int32_t)len)))
+		    != 0 &&
+		    (ret = __repmgr_bust_connection(env, conn)) != 0)
+			goto out;
+		if ((conn = site->ref.conn.out) != NULL &&
+		    conn->state == CONN_READY &&
+		    (ret = __repmgr_send_own_msg(env, conn, REPMGR_SHARING,
+		    (conn->version < 5 ? v4buf : buf),
+		    (conn->version < 5 ? (u_int32_t)v4len : (u_int32_t)len)))
+		    != 0 &&
+		    (ret = __repmgr_bust_connection(env, conn)) != 0)
+			goto out;
+	}
+out:
+	if (buf != NULL)
+		__os_free(env, buf);
+	if (v4buf != NULL)
+		__os_free(env, v4buf);
+	return (ret);
+}
diff --git a/src/repmgr/repmgr_windows.c b/src/repmgr/repmgr_windows.c
index d9c2a03d..8cf05960 100644
--- a/src/repmgr/repmgr_windows.c
+++ b/src/repmgr/repmgr_windows.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -252,7 +252,7 @@ allocate_wait_slot(env, resultp, table)
 	 * the previous wait but before reacquiring the mutex, and this
 	 * extra signal would incorrectly cause the next wait to return
 	 * immediately.
-	 */ 
+	 */
 	(void)WaitForSingleObject(w->event, 0);
 	*resultp = i;
 	return (0);
@@ -639,31 +639,40 @@ __repmgr_select_loop(env)
 	WSAEVENT listen_event;
 	WSANETWORKEVENTS net_events;
 	struct io_info io_info;
-	int i;
+	int accept_connect, i;
 
 	db_rep = env->rep_handle;
 	io_info.connections = connections;
 	io_info.events = events;
+	accept_connect = FALSE;
 
 	if ((listen_event = WSACreateEvent()) == WSA_INVALID_EVENT) {
 		__db_err(env, net_errno, DB_STR("3590",
 		    "can't create event for listen socket"));
 		return (net_errno);
 	}
-	if (!IS_SUBORDINATE(db_rep) &&
-	    WSAEventSelect(db_rep->listen_fd, listen_event, FD_ACCEPT) ==
-	    SOCKET_ERROR) {
-		ret = net_errno;
-		__db_err(env, ret, DB_STR("3591",
-		    "can't enable event for listener"));
-		(void)WSACloseEvent(listen_event);
-		goto out;
-	}
 
 	LOCK_MUTEX(db_rep->mutex);
 	if ((ret = __repmgr_first_try_connections(env)) != 0)
 		goto unlock;
 	for (;;) {
+		/*
+		 * Set the event for this process to receive notification of
+		 * incoming connections if this process is or has just taken
+		 * over as the listener process.
+		 */
+		if (!IS_SUBORDINATE(db_rep) && !accept_connect) {
+			if (WSAEventSelect(db_rep->listen_fd, listen_event,
+			    FD_ACCEPT) == SOCKET_ERROR) {
+				ret = net_errno;
+				__db_err(env, ret, DB_STR("3700",
+				    "can't enable event for listener"));
+				(void)WSACloseEvent(listen_event);
+				goto out;
+			}
+			accept_connect = TRUE;
+		}
+
 		/* Start with the two events that we always wait for. */
 #define	SIGNALER_INDEX	0
 #define	LISTENER_INDEX	1
@@ -714,6 +723,8 @@ __repmgr_select_loop(env)
 					ret = net_errno;
 					goto unlock;
 				}
+				if (net_events.lNetworkEvents == 0)
+					continue;
 				DB_ASSERT(env,
 				    net_events.lNetworkEvents & FD_ACCEPT);
 				if ((ret = net_events.iErrorCode[FD_ACCEPT_BIT])
@@ -815,7 +826,16 @@ handle_completion(env, conn)
 	/* Check both writing and reading. */
 	if (events.lNetworkEvents & FD_CLOSE) {
 		error = events.iErrorCode[FD_CLOSE_BIT];
-		goto report;
+
+		/*
+		 * There could be data for reading when we see FD_CLOSE,
+		 * so we should try reading in this case.
+		 */
+		if (error != 0)
+			goto report;
+		else if ((ret =
+		    __repmgr_read_from_site(env, conn)) != 0)
+			goto err;
 	}
 
 	if (events.lNetworkEvents & FD_WRITE) {
@@ -823,7 +843,7 @@ handle_completion(env, conn)
 			error = events.iErrorCode[FD_WRITE_BIT];
 			goto report;
 		} else if ((ret =
-			__repmgr_write_some(env, conn)) != 0)
+		    __repmgr_write_some(env, conn)) != 0)
 			goto err;
 	}
 
@@ -832,7 +852,7 @@ handle_completion(env, conn)
 			error = events.iErrorCode[FD_READ_BIT];
 			goto report;
 		} else if ((ret =
-			__repmgr_read_from_site(env, conn)) != 0)
+		    __repmgr_read_from_site(env, conn)) != 0)
 			goto err;
 	}
 
diff --git a/src/sequence/seq_stat.c b/src/sequence/seq_stat.c
index d5b9a401..28f61174 100644
--- a/src/sequence/seq_stat.c
+++ b/src/sequence/seq_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2004, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2004, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -124,10 +124,12 @@ __seq_stat_print(seq, flags)
 	DB *dbp;
 	DB_THREAD_INFO *ip;
 	ENV *env;
+	u_int32_t orig_flags;
 	int handle_check, ret, t_ret;
 
 	dbp = seq->seq_dbp;
 	env = dbp->env;
+	ret = 0;
 
 	SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->stat_print");
 
@@ -140,11 +142,16 @@ __seq_stat_print(seq, flags)
 		goto err;
 	}
 
-	if ((ret = __seq_print_stats(seq, flags)) != 0)
-		goto err;
+	orig_flags = flags;
+	LF_CLR(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM);
+	if (flags == 0 || LF_ISSET(DB_STAT_ALL)) {
+		ret = __seq_print_stats(seq, orig_flags);
+		if (flags == 0 || ret != 0)
+			goto err;
+	}
 
 	if (LF_ISSET(DB_STAT_ALL) &&
-	    (ret = __seq_print_all(seq, flags)) != 0)
+	    (ret = __seq_print_all(seq, orig_flags)) != 0)
 		goto err;
 
 	/* Release replication block. */
diff --git a/src/sequence/sequence.c b/src/sequence/sequence.c
index 1c19f838..9ee31123 100644
--- a/src/sequence/sequence.c
+++ b/src/sequence/sequence.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2004, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2004, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -53,24 +53,23 @@
 		}							\
 	} while (0)
 
-static int __seq_chk_cachesize __P((ENV *, int32_t, db_seq_t, db_seq_t));
-static int __seq_close __P((DB_SEQUENCE *, u_int32_t));
+static int __seq_chk_cachesize __P((ENV *, u_int32_t, db_seq_t, db_seq_t));
 static int __seq_close_pp __P((DB_SEQUENCE *, u_int32_t));
-static int __seq_get
-	       __P((DB_SEQUENCE *, DB_TXN *, int32_t,  db_seq_t *, u_int32_t));
-static int __seq_get_cachesize __P((DB_SEQUENCE *, int32_t *));
+static int __seq_get_pp
+	       __P((DB_SEQUENCE *,
+		   DB_TXN *, u_int32_t,  db_seq_t *, u_int32_t));
+static int __seq_get_cachesize __P((DB_SEQUENCE *, u_int32_t *));
 static int __seq_get_db __P((DB_SEQUENCE *, DB **));
 static int __seq_get_flags __P((DB_SEQUENCE *, u_int32_t *));
 static int __seq_get_key __P((DB_SEQUENCE *, DBT *));
 static int __seq_get_range __P((DB_SEQUENCE *, db_seq_t *, db_seq_t *));
-static int __seq_initial_value __P((DB_SEQUENCE *, db_seq_t));
 static int __seq_open_pp __P((DB_SEQUENCE *, DB_TXN *, DBT *, u_int32_t));
 static int __seq_remove __P((DB_SEQUENCE *, DB_TXN *, u_int32_t));
-static int __seq_set_cachesize __P((DB_SEQUENCE *, int32_t));
+static int __seq_set_cachesize __P((DB_SEQUENCE *, u_int32_t));
 static int __seq_set_flags __P((DB_SEQUENCE *, u_int32_t));
 static int __seq_set_range __P((DB_SEQUENCE *, db_seq_t, db_seq_t));
 static int __seq_update
-	__P((DB_SEQUENCE *, DB_THREAD_INFO *, DB_TXN *, int32_t, u_int32_t));
+	__P((DB_SEQUENCE *, DB_THREAD_INFO *, DB_TXN *, u_int32_t, u_int32_t));
 
 /*
  * db_sequence_create --
@@ -113,7 +112,7 @@ db_sequence_create(seqp, dbp, flags)
 
 	seq->seq_dbp = dbp;
 	seq->close = __seq_close_pp;
-	seq->get = __seq_get;
+	seq->get = __seq_get_pp;
 	seq->get_cachesize = __seq_get_cachesize;
 	seq->set_cachesize = __seq_set_cachesize;
 	seq->get_db = __seq_get_db;
@@ -134,7 +133,7 @@ db_sequence_create(seqp, dbp, flags)
 }
 
 /*
- * __seq_open --
+ * __seq_open_pp --
  *	DB_SEQUENCE->open method.
  *
  */
@@ -146,21 +145,18 @@ __seq_open_pp(seq, txn, keyp, flags)
 	u_int32_t flags;
 {
 	DB *dbp;
-	DB_SEQ_RECORD *rp;
 	DB_THREAD_INFO *ip;
 	ENV *env;
-	u_int32_t tflags;
-	int handle_check, txn_local, ret, t_ret;
+	int handle_check, ret, t_ret;
 #define	SEQ_OPEN_FLAGS	(DB_CREATE | DB_EXCL | DB_THREAD)
 
-	dbp = seq->seq_dbp;
-	env = dbp->env;
-	txn_local = 0;
-
-	STRIP_AUTO_COMMIT(flags);
 	SEQ_ILLEGAL_AFTER_OPEN(seq, "DB_SEQUENCE->open");
 
+	env = seq->seq_dbp->env;
+	dbp = seq->seq_dbp;
+
 	ENV_ENTER(env, ip);
+	STRIP_AUTO_COMMIT(flags);
 
 	/* Check for replication block. */
 	handle_check = IS_ENV_REPLICATED(env);
@@ -174,6 +170,41 @@ __seq_open_pp(seq, txn, keyp, flags)
 	    "DB_SEQUENCE->open", flags, SEQ_OPEN_FLAGS)) != 0)
 		goto err;
 
+	ret = __seq_open(seq, txn, keyp, flags);
+
+	/* Release replication block. */
+err:	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
+		ret = t_ret;
+	ENV_LEAVE(env, ip);
+
+	return (ret);
+}
+
+/*
+ * __seq_open --
+ *	Internal open function.
+ *
+ * PUBLIC: int __seq_open __P((DB_SEQUENCE *, DB_TXN *, DBT *, u_int32_t));
+ */
+
+int
+__seq_open(seq, txn, keyp, flags)
+	DB_SEQUENCE *seq;
+	DB_TXN *txn;
+	DBT *keyp;
+	u_int32_t flags;
+{
+	DB *dbp;
+	DB_SEQ_RECORD *rp;
+	DB_THREAD_INFO *ip;
+	ENV *env;
+	u_int32_t tflags;
+	int txn_local, ret, t_ret;
+
+	dbp = seq->seq_dbp;
+	env = dbp->env;
+	txn_local = 0;
+
 	if (keyp->size == 0) {
 		__db_errx(env, DB_STR("4001",
 		    "Zero length sequence key specified"));
@@ -229,6 +260,7 @@ __seq_open_pp(seq, txn, keyp, flags)
 	seq->seq_key.size = seq->seq_key.ulen = keyp->size;
 	seq->seq_key.flags = DB_DBT_USERMEM;
 
+	ENV_GET_THREAD_INFO(env, ip);
 retry:	if ((ret = __db_get(dbp, ip,
 	    txn, &seq->seq_key, &seq->seq_data, 0)) != 0) {
 		if (ret == DB_BUFFER_SMALL &&
@@ -369,11 +401,6 @@ err:	if (txn_local &&
 		__os_free(env, seq->seq_key.data);
 		seq->seq_key.data = NULL;
 	}
-	/* Release replication block. */
-	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
-		ret = t_ret;
-
-	ENV_LEAVE(env, ip);
 	__dbt_userfree(env, keyp, NULL, NULL);
 	return (ret);
 }
@@ -386,10 +413,8 @@ err:	if (txn_local &&
 static int
 __seq_get_cachesize(seq, cachesize)
 	DB_SEQUENCE *seq;
-	int32_t *cachesize;
+	u_int32_t *cachesize;
 {
-	SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->get_cachesize");
-
 	*cachesize = seq->seq_cache_size;
 	return (0);
 }
@@ -402,25 +427,9 @@ __seq_get_cachesize(seq, cachesize)
 static int
 __seq_set_cachesize(seq, cachesize)
 	DB_SEQUENCE *seq;
-	int32_t cachesize;
+	u_int32_t cachesize;
 {
-	ENV *env;
-	int ret;
-
-	env = seq->seq_dbp->env;
-
-	if (cachesize < 0) {
-		__db_errx(env, DB_STR("4007",
-		    "Cache size must be >= 0"));
-		return (EINVAL);
-	}
-
-	/*
-	 * It's an error to specify a cache larger than the range of sequences.
-	 */
-	if (SEQ_IS_OPEN(seq) && (ret = __seq_chk_cachesize(env,
-	    cachesize, seq->seq_rp->seq_max, seq->seq_rp->seq_min)) != 0)
-		return (ret);
+	SEQ_ILLEGAL_AFTER_OPEN(seq, "DB_SEQUENCE->set_cachesize");
 
 	seq->seq_cache_size = cachesize;
 	return (0);
@@ -437,8 +446,6 @@ __seq_get_flags(seq, flagsp)
 	DB_SEQUENCE *seq;
 	u_int32_t *flagsp;
 {
-	SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->get_flags");
-
 	*flagsp = F_ISSET(seq->seq_rp, SEQ_SET_FLAGS);
 	return (0);
 }
@@ -480,8 +487,10 @@ __seq_set_flags(seq, flags)
  * __seq_initial_value --
  *	DB_SEQUENCE->initial_value.
  *
+ * PUBLIC: int __seq_initial_value  __P((DB_SEQUENCE *, db_seq_t));
+ *
  */
-static int
+int
 __seq_initial_value(seq, value)
 	DB_SEQUENCE *seq;
 	db_seq_t value;
@@ -515,8 +524,6 @@ __seq_get_range(seq, minp, maxp)
 	DB_SEQUENCE *seq;
 	db_seq_t *minp, *maxp;
 {
-	SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->get_range");
-
 	*minp = seq->seq_rp->seq_min;
 	*maxp = seq->seq_rp->seq_max;
 	return (0);
@@ -557,14 +564,13 @@ __seq_update(seq, ip, txn, delta, flags)
 	DB_SEQUENCE *seq;
 	DB_THREAD_INFO *ip;
 	DB_TXN *txn;
-	int32_t delta;
-	u_int32_t flags;
+	u_int32_t delta, flags;
 {
 	DB *dbp;
 	DBT *data, ldata;
 	DB_SEQ_RECORD *rp;
 	ENV *env;
-	int32_t adjust;
+	db_seq_t adjust;
 	int ret, txn_local, need_mutex;
 
 	dbp = seq->seq_dbp;
@@ -721,29 +727,36 @@ err:	if (need_mutex) {
 	    env, txn, LF_ISSET(DB_TXN_NOSYNC), ret) : ret);
 }
 
-static int
+/*
+ * __seq_get --
+ *	Internal get function for sequence.
+ *
+ * PUBLIC: int __seq_get
+ * PUBLIC:  __P((DB_SEQUENCE *, DB_TXN *, u_int32_t,  db_seq_t *, u_int32_t));
+ */
+int
 __seq_get(seq, txn, delta, retp, flags)
 	DB_SEQUENCE *seq;
 	DB_TXN *txn;
-	int32_t delta;
+	u_int32_t delta, flags;
 	db_seq_t *retp;
-	u_int32_t flags;
 {
 	DB *dbp;
 	DB_SEQ_RECORD *rp;
 	DB_THREAD_INFO *ip;
 	ENV *env;
-	int handle_check, ret, t_ret;
+	int handle_check, ret;
 
 	dbp = seq->seq_dbp;
 	env = dbp->env;
 	rp = seq->seq_rp;
 	ret = 0;
+	ENV_GET_THREAD_INFO(env, ip);
 
 	STRIP_AUTO_COMMIT(flags);
 	SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->get");
 
-	if (delta < 0 || (delta == 0 && !LF_ISSET(DB_CURRENT))) {
+	if (delta == 0 && !LF_ISSET(DB_CURRENT)) {
 		__db_errx(env, "Sequence delta must be greater than 0");
 		return (EINVAL);
 	}
@@ -754,16 +767,9 @@ __seq_get(seq, txn, delta, retp, flags)
 		return (EINVAL);
 	}
 
-	ENV_ENTER(env, ip);
-
-	/* Check for replication block. */
-	handle_check = IS_ENV_REPLICATED(env);
-	if (handle_check &&
-	    (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0)
-		return (ret);
-
 	MUTEX_LOCK(env, seq->mtx_seq);
 
+	handle_check = IS_ENV_REPLICATED(env);
 	if (handle_check && IS_REP_CLIENT(env) &&
 	    !F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
 		ret = __db_rdonly(env, "DB_SEQUENCE->get");
@@ -799,6 +805,31 @@ __seq_get(seq, txn, delta, retp, flags)
 	}
 
 err:	MUTEX_UNLOCK(env, seq->mtx_seq);
+	return (ret);
+}
+
+static int
+__seq_get_pp(seq, txn, delta, retp, flags)
+	DB_SEQUENCE *seq;
+	DB_TXN *txn;
+	u_int32_t delta, flags;
+	db_seq_t *retp;
+{
+	DB_THREAD_INFO *ip;
+	ENV *env;
+	int handle_check, ret, t_ret;
+
+	env = seq->seq_dbp->env;
+
+	ENV_ENTER(env, ip);
+
+	/* Check for replication block. */
+	handle_check = IS_ENV_REPLICATED(env);
+	if (handle_check &&
+	    (ret = __db_rep_enter(seq->seq_dbp, 1, 0, IS_REAL_TXN(txn))) != 0)
+		return (ret);
+
+	ret = __seq_get(seq, txn, delta, retp, flags);
 
 	/* Release replication block. */
 	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
@@ -868,8 +899,9 @@ __seq_close_pp(seq, flags)
  * __seq_close --
  *	Close a sequence
  *
+ * PUBLIC: int __seq_close __P((DB_SEQUENCE *, u_int32_t));
  */
-static int
+int
 __seq_close(seq, flags)
 	DB_SEQUENCE *seq;
 	u_int32_t flags;
@@ -916,19 +948,24 @@ __seq_remove(seq, txn, flags)
 
 	dbp = seq->seq_dbp;
 	env = dbp->env;
+	handle_check = 0;
+	ret = 0;
 	txn_local = 0;
 
-	SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->remove");
+	if (!SEQ_IS_OPEN(seq))
+		ret = __db_mi_open(env, "DB_SEQUENCE->remove", 0);
 
 	/*
 	 * Flags can only be 0, unless the database has DB_AUTO_COMMIT enabled.
 	 * Then DB_TXN_NOSYNC is allowed.
 	 */
-	if (flags != 0 &&
+	if (ret == 0 && flags != 0 &&
 	    (flags != DB_TXN_NOSYNC || !IS_DB_AUTO_COMMIT(dbp, txn)))
-		return (__db_ferr(env, "DB_SEQUENCE->remove illegal flag", 0));
+		ret = __db_ferr(env, "DB_SEQUENCE->remove illegal flag", 0);
 
 	ENV_ENTER(env, ip);
+	if (ret != 0)
+		goto err;
 
 	/* Check for replication block. */
 	handle_check = IS_ENV_REPLICATED(env);
@@ -945,7 +982,7 @@ __seq_remove(seq, txn, flags)
 	 */
 	if (IS_DB_AUTO_COMMIT(dbp, txn)) {
 		if ((ret = __txn_begin(env, ip, NULL, &txn, flags)) != 0)
-			return (ret);
+			goto err;
 		txn_local = 1;
 	}
 
@@ -955,13 +992,14 @@ __seq_remove(seq, txn, flags)
 
 	ret = __db_del(dbp, ip, txn, &seq->seq_key, 0);
 
+err:
 	if ((t_ret = __seq_close(seq, 0)) != 0 && ret == 0)
 		ret = t_ret;
 
 	/* Release replication block. */
 	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
 		ret = t_ret;
-err:	if (txn_local && (t_ret =
+	if (txn_local && (t_ret =
 	    __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0)
 		ret = t_ret;
 
@@ -976,7 +1014,7 @@ err:	if (txn_local && (t_ret =
 static int
 __seq_chk_cachesize(env, cachesize, max, min)
 	ENV *env;
-	int32_t cachesize;
+	u_int32_t cachesize;
 	db_seq_t max, min;
 {
 	/*
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 81225e5c..91652cb7 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1995, 1996
@@ -227,8 +227,15 @@ __txn_begin(env, ip, parent, txnpp, flags)
 	if (LF_ISSET(DB_TXN_FAMILY))
 		F_SET(txn, TXN_FAMILY | TXN_INFAMILY | TXN_READONLY);
 	if (LF_ISSET(DB_TXN_SNAPSHOT) || F_ISSET(dbenv, DB_ENV_TXN_SNAPSHOT) ||
-	    (parent != NULL && F_ISSET(parent, TXN_SNAPSHOT)))
-		F_SET(txn, TXN_SNAPSHOT);
+	    (parent != NULL && F_ISSET(parent, TXN_SNAPSHOT))) {
+		if (IS_REP_CLIENT(env)) {
+			__db_errx(env, DB_STR("4572",
+		"DB_TXN_SNAPSHOT may not be used on a replication client"));
+			ret = (EINVAL);
+			goto err;
+		} else
+			F_SET(txn, TXN_SNAPSHOT);
+	}
 	if (LF_ISSET(DB_IGNORE_LEASE))
 		F_SET(txn, TXN_IGNORE_LEASE);
 
@@ -581,8 +588,7 @@ __txn_continue(env, txn, td, ip, add_to_list)
 	txn->set_timeout = __txn_set_timeout;
 	txn->set_txn_lsnp = __txn_set_txn_lsnp;
 
-	/* XXX Do we need to explicitly set a SYNC flag here? */
-	txn->flags = TXN_MALLOC |
+	txn->flags = TXN_MALLOC | TXN_SYNC |
 	    (F_ISSET(td, TXN_DTL_NOWAIT) ? TXN_NOWAIT : 0);
 	txn->xa_thr_status = TXN_XA_THREAD_NOTA;
 
@@ -795,8 +801,9 @@ __txn_commit(txn, flags)
 				if (ret == 0) {
 					DB_LSN s_lsn;
 
-					DB_ASSERT(env, __log_current_lsn_int(
-					    env, &s_lsn, NULL, NULL) == 0);
+					if ((ret = __log_current_lsn_int(
+					    env, &s_lsn, NULL, NULL)) != 0)
+						goto err;
 					DB_ASSERT(env, LOG_COMPARE(
 					    &td->visible_lsn, &s_lsn) <= 0);
 					COMPQUIET(s_lsn.file, 0);
@@ -890,17 +897,16 @@ static int
 __txn_close_cursors(txn)
 	DB_TXN *txn;
 {
-	int ret, tret;
+	int ret, t_ret;
 	DBC *dbc;
 
-	ret = tret = 0;
+	ret = t_ret = 0;
 	dbc = NULL;
 
 	if (txn == NULL)
 		return (0);
 
 	while ((dbc = TAILQ_FIRST(&txn->my_cursors)) != NULL) {
-
 		DB_ASSERT(dbc->env, txn == dbc->txn);
 
 		/*
@@ -913,21 +919,21 @@ __txn_close_cursors(txn)
 
 		/* Removed from the active queue here. */
 		if (F_ISSET(dbc, DBC_ACTIVE))
-			ret = __dbc_close(dbc);
+			t_ret = __dbc_close(dbc);
 
 		dbc->txn = NULL;
 
 		/* We have to close all cursors anyway, so continue on error. */
-		if (ret != 0) {
-			__db_err(dbc->env, ret, "__dbc_close");
-			if (tret == 0)
-				tret = ret;
+		if (t_ret != 0) {
+			__db_err(dbc->env, t_ret, "__dbc_close");
+			if (ret == 0)
+				ret = t_ret;
 		}
 	}
 	txn->my_cursors.tqh_first = NULL;
 	txn->my_cursors.tqh_last = NULL;
 
-	return (tret);/* Return the first error if any. */
+	return (ret);	/* Return the first error, if any. */
 }
 
 /*
@@ -1050,7 +1056,7 @@ __txn_abort(txn)
 	 * it, however make sure that it is aborted when the last process
 	 * tries to abort it.
 	 */
-	if (txn->xa_thr_status != TXN_XA_THREAD_NOTA &&  td->xa_ref > 1) {
+	if (txn->xa_thr_status != TXN_XA_THREAD_NOTA && td->xa_ref > 1) {
 		td->status = TXN_NEED_ABORT;
 		return (0);
 	}
@@ -2165,5 +2171,5 @@ __txn_applied(env, ip, commit_info, timeout)
 	if (renv->envid == commit_info->envid &&
 	    LOG_COMPARE(&commit_info->lsn, &lsn) <= 0)
 		return (0);
-	return (DB_NOTFOUND);
+	return (USR_ERR(env, DB_NOTFOUND));
 }
diff --git a/src/txn/txn.src b/src/txn/txn.src
index 7e82dc82..d9af5318 100644
--- a/src/txn/txn.src
+++ b/src/txn/txn.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/txn/txn_chkpt.c b/src/txn/txn_chkpt.c
index 73715b10..a909767f 100644
--- a/src/txn/txn_chkpt.c
+++ b/src/txn/txn_chkpt.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1995, 1996
@@ -377,7 +377,7 @@ __txn_getckp(env, lsnp)
 	TXN_SYSTEM_UNLOCK(env);
 
 	if (IS_ZERO_LSN(lsn))
-		return (DB_NOTFOUND);
+		return (USR_ERR(env, DB_NOTFOUND));
 
 	*lsnp = lsn;
 	return (0);
diff --git a/src/txn/txn_failchk.c b/src/txn/txn_failchk.c
index b2007ad6..94f22ec2 100644
--- a/src/txn/txn_failchk.c
+++ b/src/txn/txn_failchk.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -57,7 +57,7 @@ retry:	TXN_SYSTEM_LOCK(env);
 
 		if (F_ISSET(td, TXN_DTL_INMEMORY)) {
 			TXN_SYSTEM_UNLOCK(env);
-			return (__db_failed(env, DB_STR("4501",
+			return (__db_failed(env, DB_STR("4573",
 			    "Transaction has in memory logs"),
 			     td->pid, td->tid));
 		}
diff --git a/src/txn/txn_method.c b/src/txn/txn_method.c
index 629eac04..357e78c6 100644
--- a/src/txn/txn_method.c
+++ b/src/txn/txn_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/txn/txn_rec.c b/src/txn/txn_rec.c
index b39d56d1..708af98a 100644
--- a/src/txn/txn_rec.c
+++ b/src/txn/txn_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  */
 /*
  * Copyright (c) 1996
@@ -210,11 +210,12 @@ __txn_prepare_recover(env, dbtp, lsnp, op, info)
 		 */
 		else if ((ret = __db_txnlist_remove(env,
 		    info, argp->txnp->txnid)) != 0) {
-txn_err:		__db_errx(env,
+txn_err:
+			ret = USR_ERR(env, DB_NOTFOUND);
+			__db_errx(env,
 			    DB_STR_A("4515",
 			    "transaction not in list %lx", "%lx"),
 			    (u_long)argp->txnp->txnid);
-			ret = DB_NOTFOUND;
 		} else if (IS_ZERO_LSN(headp->trunc_lsn) ||
 		    LOG_COMPARE(&headp->trunc_lsn, lsnp) >= 0) {
 			if ((ret = __db_txnlist_add(env,
diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c
index 67f24439..915a289f 100644
--- a/src/txn/txn_recover.c
+++ b/src/txn/txn_recover.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -305,8 +305,8 @@ __txn_openfiles(env, ip, min, force)
 
 	if ((ret = __db_txnlist_init(env, ip, 0, 0, NULL, &txninfo)) != 0)
 		goto err;
-	ret = __env_openfiles(
-	    env, logc, txninfo, &data, &open_lsn, NULL, (double)0, 0);
+	ret = __env_openfiles(env,
+	    logc, txninfo, &data, &open_lsn, NULL, (double)0, 0);
 	if (txninfo != NULL)
 		__db_txnlist_end(env, txninfo);
 
diff --git a/src/txn/txn_region.c b/src/txn/txn_region.c
index 6f43d45f..7fef66e6 100644
--- a/src/txn/txn_region.c
+++ b/src/txn/txn_region.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -13,6 +13,7 @@
 #include "dbinc/txn.h"
 
 static int __txn_init __P((ENV *, DB_TXNMGR *));
+static int lsn_hi_to_low __P((const void *, const void *));
 
 /*
  * __txn_open --
@@ -57,12 +58,30 @@ __txn_open(env)
 	env->tx_handle = mgr;
 	return (0);
 
-err:	env->tx_handle = NULL;
-	if (mgr->reginfo.addr != NULL)
-		(void)__env_region_detach(env, &mgr->reginfo, 0);
+err:	(void)__mutex_free(env, &mgr->mutex);
+	(void)__txn_region_detach(env, mgr);
 
-	(void)__mutex_free(env, &mgr->mutex);
-	__os_free(env, mgr);
+	return (ret);
+}
+
+/*
+ * __txn_region_detach --
+ *
+ * PUBLIC: int __txn_region_detach __P((ENV *, DB_TXNMGR *));
+ */
+int
+__txn_region_detach(env, mgr)
+	ENV *env;
+	DB_TXNMGR *mgr;
+{
+	int ret;
+
+	ret = 0;
+	if (mgr != NULL) {
+		ret = __env_region_detach(env, &mgr->reginfo, 0);
+		__os_free(env, mgr);
+		env->tx_handle = NULL;
+	}
 	return (ret);
 }
 
@@ -409,39 +428,101 @@ __txn_id_set(env, cur_txnid, max_txnid)
 }
 
 /*
- * __txn_oldest_reader --
- *	 Find the oldest "read LSN" of any active transaction'
- *	 MVCC changes older than this can safely be discarded from the cache.
+ * lsn_hi_to_low --
+ *	Compare lsns, sorting them from high to low. This is the opposite of
+ *	__rep_lsn_cmp.
+ */
+static int
+lsn_hi_to_low(lsn1, lsn2)
+	const void *lsn1, *lsn2;
+{
+	return (LOG_COMPARE((DB_LSN *)lsn2, (DB_LSN *)lsn1));
+}
+
+/*
+ * __txn_get_readers --
+ *	Find the read LSN of all active transactions.
+ *	MVCC versions older than the oldest active transaction can safely be
+ *	discarded from the cache. MVCC versions not quite so old can be
+ *      discarded if they are not visible to any active transaction.
  *
- * PUBLIC: int __txn_oldest_reader __P((ENV *, DB_LSN *));
+ * Returns:
+ *	An error code, or 0.
+ *	If 0 was returned, *readers has been filled in with an __os_malloc()'d
+ *	array of active transactions with read_lsns, sorted from newest
+ *      (largest) to oldest (smallest). *ntxnsp indicates how many are there.
+ *	The last lsn is that of the oldest active mvcc-supporting transaction.
+ *	The caller must __os_free() *readers whenever it is non-NULL.
+ *
+ * PUBLIC: int __txn_get_readers __P((ENV *, DB_LSN **, int *));
  */
+#define	TXN_READERS_SIZE	64 /* Initial number of LSNs to allocate. */
 int
-__txn_oldest_reader(env, lsnp)
+__txn_get_readers(env, readers, ntxnsp)
 	ENV *env;
-	DB_LSN *lsnp;
+	DB_LSN **readers;
+	int *ntxnsp;
 {
-	DB_LSN old_lsn;
+	DB_LSN current, *lsns;
 	DB_TXNMGR *mgr;
 	DB_TXNREGION *region;
 	TXN_DETAIL *td;
-	int ret;
+	int cmp, is_sorted, ret;
+	unsigned count, txnmax;
+
+	*ntxnsp = 0;
+	*readers = NULL;
 
 	if ((mgr = env->tx_handle) == NULL)
 		return (0);
 	region = mgr->reginfo.primary;
+	lsns = NULL;
+
+	if ((ret = __log_current_lsn_int(env, &current, NULL, NULL)) != 0)
+		return (ret);
 
-	if ((ret = __log_current_lsn_int(env, &old_lsn, NULL, NULL)) != 0)
+	txnmax = TXN_READERS_SIZE;
+	if ((ret = __os_malloc(env, txnmax * sizeof(lsns[0]), &lsns)) != 0)
 		return (ret);
 
 	TXN_SYSTEM_LOCK(env);
-	SH_TAILQ_FOREACH(td, &region->active_txn, links, __txn_detail)
-		if (LOG_COMPARE(&td->read_lsn, &old_lsn) < 0)
-			old_lsn = td->read_lsn;
+	/* The array always has at least the current lsn. */
+	lsns[0] = current;
+	count = 1;
+	is_sorted = TRUE;
 
-	*lsnp = old_lsn;
+	/*
+	 * Build up our array in most-recent (largest) to first-started (oldest)
+	 * order. Delete adjacent dups. Detect when the txns need to be sorted.
+	 */
+	SH_TAILQ_FOREACH(td, &region->active_txn, links, __txn_detail) {
+		if (IS_MAX_LSN(td->read_lsn) ||
+		    (cmp = LOG_COMPARE(&td->read_lsn, &lsns[count - 1])) == 0)
+			continue;
+		if (cmp > 0)
+			is_sorted = FALSE;
+		if (count >= txnmax) {
+			txnmax += txnmax;
+			if ((ret = __os_realloc(env,
+			    txnmax * sizeof(lsns[0]), &lsns)) != 0)
+				goto err;
+		}
+		lsns[count] = td->read_lsn;
+		count++;
+	}
+
+err:
 	TXN_SYSTEM_UNLOCK(env);
 
-	return (0);
+	if (ret != 0)
+		__os_free(env, lsns);
+	else {
+		if (!is_sorted)
+			qsort(lsns, count, sizeof(lsns[0]), lsn_hi_to_low);
+		*ntxnsp = (int)count;
+		*readers = lsns;
+	}
+	return (ret);
 }
 
 /*
diff --git a/src/txn/txn_stat.c b/src/txn/txn_stat.c
index 62fe622d..231ac3c5 100644
--- a/src/txn/txn_stat.c
+++ b/src/txn/txn_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
diff --git a/src/txn/txn_util.c b/src/txn/txn_util.c
index 0ecd7f6c..9f3b8cf6 100644
--- a/src/txn/txn_util.c
+++ b/src/txn/txn_util.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,7 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/blob.h"
 #include "dbinc/db_page.h"
 #include "dbinc/lock.h"
 #include "dbinc/mp.h"
@@ -209,7 +210,7 @@ __txn_remlock(env, txn, lock, locker)
 
 	for (e = TAILQ_FIRST(&txn->events); e != NULL; e = next_e) {
 		next_e = TAILQ_NEXT(e, links);
-		if ((e->op != TXN_TRADE && e->op != TXN_TRADED && 
+		if ((e->op != TXN_TRADE && e->op != TXN_TRADED &&
 		    e->op != TXN_XTRADE) ||
 		    (e->u.t.lock.off != lock->off && e->u.t.locker != locker))
 			continue;
@@ -280,13 +281,21 @@ __txn_doevents(env, txn, opcode, preprocess)
 		    e != NULL; e = enext) {
 			enext = TAILQ_NEXT(e, links);
 			/*
-			 * Move all exclusive handle locks and 
+			 * Move all exclusive handle locks and
 			 * read handle locks to the handle locker.
 			 */
 			if (!(opcode == TXN_COMMIT && e->op == TXN_XTRADE) &&
-			    (e->op != TXN_TRADE || 
-			    IS_WRITELOCK(e->u.t.lock.mode)))
+			    (e->op != TXN_TRADE ||
+			    IS_WRITELOCK(e->u.t.lock.mode))) {
+				if (opcode == TXN_PREPARE &&
+				    e->op == TXN_REMOVE) {
+					__db_errx(env, DB_STR_A("4501",
+"TXN->prepare is not allowed because this transaction removes \"%s\"", "%s"),
+					    e->u.r.name);
+					return (EINVAL);
+				}
 				continue;
+			}
 			DO_TRADE;
 			if (txn->parent != NULL) {
 				TAILQ_REMOVE(&txn->events, e, links);
@@ -321,17 +330,26 @@ __txn_doevents(env, txn, opcode, preprocess)
 				ret = t_ret;
 			break;
 		case TXN_REMOVE:
-			if (txn->parent != NULL)
+			if (txn->parent != NULL) {
 				TAILQ_INSERT_TAIL(
 				    &txn->parent->events, e, links);
-			else if (e->u.r.fileid != NULL) {
+				continue;
+			} else if (e->u.r.fileid != NULL) {
 				if ((t_ret = __memp_nameop(env,
 				    e->u.r.fileid, NULL, e->u.r.name,
 				    NULL, e->u.r.inmem)) != 0 && ret == 0)
 					ret = t_ret;
-			} else if ((t_ret =
-			    __os_unlink(env, e->u.r.name, 0)) != 0 && ret == 0)
-				ret = t_ret;
+			} else if ((t_ret = __os_unlink(
+			    env, e->u.r.name, 0)) != 0 && ret == 0) {
+				/*
+				 * It is possible for blob files to be deleted
+				 * multiple times when truncating a database,
+				 * so ignore ENOENT errors with blob files.
+				 */
+				if (t_ret != ENOENT || strstr(
+				    e->u.r.name, BLOB_FILE_PREFIX) == NULL)
+					ret = t_ret;
+			}
 			break;
 		case TXN_TRADE:
 		case TXN_XTRADE:
@@ -371,8 +389,6 @@ dofree:
 		/* Free resources here. */
 		switch (e->op) {
 		case TXN_REMOVE:
-			if (txn->parent != NULL)
-				continue;
 			if (e->u.r.fileid != NULL)
 				__os_free(env, e->u.r.fileid);
 			__os_free(env, e->u.r.name);
@@ -548,9 +564,8 @@ __txn_reset_fe_watermarks(txn)
 {
 	DB *db;
 
-	if (txn->parent) {
+	if (txn->parent)
 		DB_ASSERT(txn->mgrp->env, TAILQ_FIRST(&txn->femfs) == NULL);
-	}
 
 	while ((db = TAILQ_FIRST(&txn->femfs)))
 		__clear_fe_watermark(txn, db);
diff --git a/src/xa/xa.c b/src/xa/xa.c
index ee75e792..5ce7842f 100644
--- a/src/xa/xa.c
+++ b/src/xa/xa.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -233,8 +233,8 @@ __xa_put_txn(env, txnp)
 	SH_TAILQ_REMOVE(&ip->dbth_xatxn, txnp, xa_links, __db_txn);
 	TAILQ_REMOVE(&txnp->mgrp->txn_chain, txnp, links);
 	td = txnp->td;
-	DB_ASSERT(env, td->xa_ref > 0);
-	td->xa_ref--;
+	if (td->xa_ref > 0)
+		td->xa_ref--;
 	__os_free(env, txnp);
 	ip->dbth_xa_status = TXN_XA_THREAD_UNASSOCIATED;
 }
@@ -852,9 +852,9 @@ __db_xa_commit(xid, rmid, arg_flags)
 		return (ret);
 
 	/*
-	 * Because this transaction is currently associated, commit will not free
-	 * the transaction structure, which is good, because we need to do that
-	 * in xa_put_txn below.
+	 * Because this transaction is currently associated, commit will
+	 * not free the transaction structure, which is good, because we
+	 * need to do that in xa_put_txn below.
 	 */
 	if ((ret = txnp->commit(txnp, 0)) != 0) {
 		dbenv->err(dbenv, ret, DB_STR("4563",
diff --git a/src/xa/xa_map.c b/src/xa/xa_map.c
index 4dcf4d75..9fd50185 100644
--- a/src/xa/xa_map.c
+++ b/src/xa/xa_map.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */