Import changeset

author: unknown <tim@threads.polyesthetic.msg> 2001-03-04 19:42:05 -0500
committer: unknown <tim@threads.polyesthetic.msg> 2001-03-04 19:42:05 -0500
commit: 07dc15a5b0fafaf0a0bcde2768b34aad2f3825fa (patch)
tree: 9dd732e08dba156ee3d7635caedc0dc3107ecac6 /bdb/hash
parent: 542e1c18dc5bf80665df55ffa04a48d986945259 (diff)
download: mariadb-git-07dc15a5b0fafaf0a0bcde2768b34aad2f3825fa.tar.gz
14 files changed, 10338 insertions, 0 deletions
diff --git a/bdb/hash/hash.c b/bdb/hash/hash.c
new file mode 100644
index 00000000000..e96fd4898f0
--- /dev/null
+++ b/bdb/hash/hash.c
@@ -0,0 +1,2096 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *	Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash.c,v 11.94 2001/01/03 16:42:26 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_am.h"
+#include "db_ext.h"
+#include "db_shash.h"
+#include "db_swap.h"
+#include "hash.h"
+#include "btree.h"
+#include "log.h"
+#include "lock.h"
+#include "txn.h"
+
+static int  __ham_c_close __P((DBC *, db_pgno_t, int *));
+static int  __ham_c_del __P((DBC *));
+static int  __ham_c_destroy __P((DBC *));
+static int  __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
+static int  __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
+static int  __ham_c_writelock __P((DBC *));
+static int  __ham_del_dups __P((DBC *, DBT *));
+static int  __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
+static int  __ham_dup_return __P((DBC *, DBT *, u_int32_t));
+static int  __ham_expand_table __P((DBC *));
+static int  __ham_init_htab __P((DBC *,
+		const char *, db_pgno_t, u_int32_t, u_int32_t));
+static int  __ham_lookup __P((DBC *,
+		const DBT *, u_int32_t, db_lockmode_t, db_pgno_t *));
+static int  __ham_overwrite __P((DBC *, DBT *, u_int32_t));
+
+/*
+ * __ham_metachk --
+ *
+ * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *));
+ */
+int
+__ham_metachk(dbp, name, hashm)
+	DB *dbp;
+	const char *name;
+	HMETA *hashm;
+{
+	DB_ENV *dbenv;
+	u_int32_t vers;
+	int ret;
+
+	dbenv = dbp->dbenv;
+
+	/*
+	 * At this point, all we know is that the magic number is for a Hash.
+	 * Check the version, the database may be out of date.
+	 */
+	vers = hashm->dbmeta.version;
+	if (F_ISSET(dbp, DB_AM_SWAP))
+		M_32_SWAP(vers);
+	switch (vers) {
+	case 4:
+	case 5:
+	case 6:
+		__db_err(dbenv,
+		    "%s: hash version %lu requires a version upgrade",
+		    name, (u_long)vers);
+		return (DB_OLD_VERSION);
+	case 7:
+		break;
+	default:
+		__db_err(dbenv,
+		    "%s: unsupported hash version: %lu", name, (u_long)vers);
+		return (EINVAL);
+	}
+
+	/* Swap the page if we need to. */
+	if (F_ISSET(dbp, DB_AM_SWAP) && (ret = __ham_mswap((PAGE *)hashm)) != 0)
+		return (ret);
+
+	/* Check the type. */
+	if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN)
+		return (EINVAL);
+	dbp->type = DB_HASH;
+	DB_ILLEGAL_METHOD(dbp, DB_OK_HASH);
+
+	/*
+	 * Check application info against metadata info, and set info, flags,
+	 * and type based on metadata info.
+	 */
+	if ((ret = __db_fchk(dbenv,
+	    "DB->open", hashm->dbmeta.flags,
+	    DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0)
+		return (ret);
+
+	if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP))
+		F_SET(dbp, DB_AM_DUP);
+	else
+		if (F_ISSET(dbp, DB_AM_DUP)) {
+			__db_err(dbenv,
+		"%s: DB_DUP specified to open method but not set in database",
+			    name);
+			return (EINVAL);
+		}
+
+	if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB))
+		F_SET(dbp, DB_AM_SUBDB);
+	else
+		if (F_ISSET(dbp, DB_AM_SUBDB)) {
+			__db_err(dbenv,
+	    "%s: multiple databases specified but not supported in file",
+			name);
+			return (EINVAL);
+		}
+
+	if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) {
+		if (dbp->dup_compare == NULL)
+			dbp->dup_compare = __bam_defcmp;
+	} else
+		if (dbp->dup_compare != NULL) {
+			__db_err(dbenv,
+		"%s: duplicate sort function specified but not set in database",
+			    name);
+			return (EINVAL);
+		}
+
+	/* Set the page size. */
+	dbp->pgsize = hashm->dbmeta.pagesize;
+
+	/* Copy the file's ID. */
+	memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN);
+
+	return (0);
+}
+
+/*
+ * __ham_open --
+ *
+ * PUBLIC: int __ham_open __P((DB *, const char *, db_pgno_t, u_int32_t));
+ */
+int
+__ham_open(dbp, name, base_pgno, flags)
+	DB *dbp;
+	const char *name;
+	db_pgno_t base_pgno;
+	u_int32_t flags;
+{
+	DB_ENV *dbenv;
+	DBC *dbc;
+	HASH_CURSOR *hcp;
+	HASH *hashp;
+	int need_sync, ret, t_ret;
+
+	dbc = NULL;
+	dbenv = dbp->dbenv;
+	need_sync = 0;
+
+	/* Initialize the remaining fields/methods of the DB. */
+	dbp->del = __ham_delete;
+	dbp->stat = __ham_stat;
+
+	/*
+	 * Get a cursor.  If DB_CREATE is specified, we may be creating
+	 * pages, and to do that safely in CDB we need a write cursor.
+	 * In STD_LOCKING mode, we'll synchronize using the meta page
+	 * lock instead.
+	 */
+	if ((ret = dbp->cursor(dbp,
+	    dbp->open_txn, &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbenv) ?
+	    DB_WRITECURSOR : 0)) != 0)
+		return (ret);
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	hashp = dbp->h_internal;
+	hashp->meta_pgno = base_pgno;
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto err1;
+
+	/*
+	 * If this is a new file, initialize it, and put it back dirty.
+	 *
+	 * Initialize the hdr structure.
+	 */
+	if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) {
+		/* File exists, verify the data in the header. */
+		if (hashp->h_hash == NULL)
+			hashp->h_hash = hcp->hdr->dbmeta.version < 5
+			? __ham_func4 : __ham_func5;
+		if (!F_ISSET(dbp, DB_RDONLY) &&
+		    hashp->h_hash(dbp,
+		    CHARKEY, sizeof(CHARKEY)) != hcp->hdr->h_charkey) {
+			__db_err(dbp->dbenv,
+			    "hash: incompatible hash function");
+			ret = EINVAL;
+			goto err2;
+		}
+		if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP))
+			F_SET(dbp, DB_AM_DUP);
+		if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT))
+			F_SET(dbp, DB_AM_DUPSORT);
+		if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB))
+			F_SET(dbp, DB_AM_SUBDB);
+	} else if (!IS_RECOVERING(dbenv)) {
+		/*
+		 * File does not exist, we must initialize the header.  If
+		 * locking is enabled that means getting a write lock first.
+		 * During recovery the meta page will be in the log.
+		 */
+		dbc->lock.pgno = base_pgno;
+
+		if (STD_LOCKING(dbc) &&
+		    ((ret = lock_put(dbenv, &hcp->hlock)) != 0 ||
+		    (ret = lock_get(dbenv, dbc->locker,
+		    DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0,
+		    &dbc->lock_dbt, DB_LOCK_WRITE, &hcp->hlock)) != 0))
+			goto err2;
+		else if (CDB_LOCKING(dbp->dbenv)) {
+			DB_ASSERT(LF_ISSET(DB_CREATE));
+			if ((ret = lock_get(dbenv, dbc->locker,
+			    DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
+			    &dbc->mylock)) != 0)
+				goto err2;
+		}
+		if ((ret = __ham_init_htab(dbc, name,
+		    base_pgno, hashp->h_nelem, hashp->h_ffactor)) != 0)
+			goto err2;
+
+		need_sync = 1;
+	}
+
+err2:	/* Release the meta data page */
+	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+err1:	if ((t_ret  = dbc->c_close(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+	/* Sync the file so that we know that the meta data goes to disk. */
+	if (ret == 0 && need_sync)
+		ret = dbp->sync(dbp, 0);
+#if CONFIG_TEST
+	if (ret == 0)
+		DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name);
+
+DB_TEST_RECOVERY_LABEL
+#endif
+	if (ret != 0)
+		(void)__ham_db_close(dbp);
+
+	return (ret);
+}
+
+/************************** LOCAL CREATION ROUTINES **********************/
+/*
+ * Returns 0 on No Error
+ */
+static int
+__ham_init_htab(dbc, name, pgno, nelem, ffactor)
+	DBC *dbc;
+	const char *name;
+	db_pgno_t pgno;
+	u_int32_t nelem, ffactor;
+{
+	DB *dbp;
+	DB_LOCK metalock;
+	DB_LSN orig_lsn;
+	DBMETA *mmeta;
+	HASH_CURSOR *hcp;
+	HASH *hashp;
+	PAGE *h;
+	db_pgno_t mpgno;
+	int32_t l2, nbuckets;
+	int dirty_mmeta, i, ret, t_ret;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	dbp = dbc->dbp;
+	hashp = dbp->h_internal;
+	mmeta = NULL;
+	h = NULL;
+	ret = 0;
+	dirty_mmeta = 0;
+	metalock.off = LOCK_INVALID;
+
+	if (hashp->h_hash == NULL)
+		hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5;
+
+	if (nelem != 0 && ffactor != 0) {
+		nelem = (nelem - 1) / ffactor + 1;
+		l2 = __db_log2(nelem > 2 ? nelem : 2);
+	} else
+		l2 = 1;
+	nbuckets = 1 << l2;
+
+	orig_lsn = hcp->hdr->dbmeta.lsn;
+	memset(hcp->hdr, 0, sizeof(HMETA));
+	ZERO_LSN(hcp->hdr->dbmeta.lsn);
+	hcp->hdr->dbmeta.pgno = pgno;
+	hcp->hdr->dbmeta.magic = DB_HASHMAGIC;
+	hcp->hdr->dbmeta.version = DB_HASHVERSION;
+	hcp->hdr->dbmeta.pagesize = dbp->pgsize;
+	hcp->hdr->dbmeta.type = P_HASHMETA;
+	hcp->hdr->dbmeta.free = PGNO_INVALID;
+	hcp->hdr->max_bucket = hcp->hdr->high_mask = nbuckets - 1;
+	hcp->hdr->low_mask = (nbuckets >> 1) - 1;
+	hcp->hdr->ffactor = ffactor;
+	hcp->hdr->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY));
+	memcpy(hcp->hdr->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
+
+	if (F_ISSET(dbp, DB_AM_DUP))
+		F_SET(&hcp->hdr->dbmeta, DB_HASH_DUP);
+	if (F_ISSET(dbp, DB_AM_SUBDB))
+		F_SET(&hcp->hdr->dbmeta, DB_HASH_SUBDB);
+	if (dbp->dup_compare != NULL)
+		F_SET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT);
+
+	if ((ret = memp_fset(dbp->mpf, hcp->hdr, DB_MPOOL_DIRTY)) != 0)
+		goto err;
+
+	/*
+	 * Create the first and second buckets pages so that we have the
+	 * page numbers for them and we can store that page number
+	 * in the meta-data header (spares[0]).
+	 */
+	hcp->hdr->spares[0] = nbuckets;
+	if ((ret = memp_fget(dbp->mpf,
+	    &hcp->hdr->spares[0], DB_MPOOL_NEW_GROUP, &h)) != 0)
+		goto err;
+
+	P_INIT(h, dbp->pgsize, hcp->hdr->spares[0], PGNO_INVALID,
+	    PGNO_INVALID, 0, P_HASH);
+
+	/* Fill in the last fields of the meta data page. */
+	hcp->hdr->spares[0] -= (nbuckets - 1);
+	for (i = 1; i <= l2; i++)
+		hcp->hdr->spares[i] = hcp->hdr->spares[0];
+	for (; i < NCACHED; i++)
+		hcp->hdr->spares[i] = PGNO_INVALID;
+
+	/*
+	 * Before we are about to put any dirty pages, we need to log
+	 * the meta-data page create.
+	 */
+	ret = __db_log_page(dbp, name, &orig_lsn, pgno, (PAGE *)hcp->hdr);
+
+	if (dbp->open_txn != NULL) {
+		mmeta = (DBMETA *) hcp->hdr;
+		if (F_ISSET(dbp, DB_AM_SUBDB)) {
+
+			/*
+			 * If this is a subdatabase, then we need to
+			 * get the LSN off the master meta data page
+			 * because that's where free pages are linked
+			 * and during recovery we need to access
+			 * that page and roll it backward/forward
+			 * correctly with respect to LSN.
+			 */
+			mpgno = PGNO_BASE_MD;
+			if ((ret = __db_lget(dbc,
+			   0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
+				goto err;
+			if ((ret = memp_fget(dbp->mpf,
+			    &mpgno, 0, (PAGE **)&mmeta)) != 0)
+				goto err;
+		}
+		if ((t_ret = __ham_groupalloc_log(dbp->dbenv,
+		    dbp->open_txn, &LSN(mmeta), 0, dbp->log_fileid,
+		    &LSN(mmeta), hcp->hdr->spares[0],
+		    hcp->hdr->max_bucket + 1, mmeta->free)) != 0 && ret == 0)
+			ret = t_ret;
+		if (ret == 0) {
+			/* need to update real LSN for buffer manager */
+			dirty_mmeta = 1;
+		}
+
+	}
+
+	DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name);
+
+DB_TEST_RECOVERY_LABEL
+err:	if (h != NULL &&
+	    (t_ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+		ret = t_ret;
+
+	if (F_ISSET(dbp, DB_AM_SUBDB) && mmeta != NULL)
+		if ((t_ret = memp_fput(dbp->mpf, mmeta,
+		    dirty_mmeta ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0)
+			ret = t_ret;
+	if (metalock.off != LOCK_INVALID)
+		(void)__TLPUT(dbc, metalock);
+
+	return (ret);
+}
+
+static int
+__ham_delete(dbp, txn, key, flags)
+	DB *dbp;
+	DB_TXN *txn;
+	DBT *key;
+	u_int32_t flags;
+{
+	DBC *dbc;
+	HASH_CURSOR *hcp;
+	db_pgno_t pgno;
+	int ret, t_ret;
+
+	/*
+	 * This is the only access method routine called directly from
+	 * the dbp, so we have to do error checking.
+	 */
+
+	PANIC_CHECK(dbp->dbenv);
+	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del");
+	DB_CHECK_TXN(dbp, txn);
+
+	if ((ret =
+	    __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
+		return (ret);
+
+	if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
+		return (ret);
+
+	DEBUG_LWRITE(dbc, txn, "ham_delete", key, NULL, flags);
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto out;
+
+	pgno = PGNO_INVALID;
+	if ((ret = __ham_lookup(dbc, key, 0, DB_LOCK_WRITE, &pgno)) == 0) {
+		if (F_ISSET(hcp, H_OK)) {
+			if (pgno == PGNO_INVALID)
+				ret = __ham_del_pair(dbc, 1);
+			else {
+				/* When we close the cursor in __ham_del_dups,
+				 * that will make the off-page dup tree go
+				 * go away as well as our current entry.  When
+				 * it updates cursors, ours should get marked
+				 * as H_DELETED.
+				 */
+				ret = __ham_del_dups(dbc, key);
+			}
+		} else
+			ret = DB_NOTFOUND;
+	}
+
+	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+out:	if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+	return (ret);
+}
+
+/* ****************** CURSORS ********************************** */
+/*
+ * __ham_c_init --
+ *	Initialize the hash-specific portion of a cursor.
+ *
+ * PUBLIC: int __ham_c_init __P((DBC *));
+ */
+int
+__ham_c_init(dbc)
+	DBC *dbc;
+{
+	DB_ENV *dbenv;
+	HASH_CURSOR *new_curs;
+	int ret;
+
+	dbenv = dbc->dbp->dbenv;
+	if ((ret = __os_calloc(dbenv,
+	    1, sizeof(struct cursor_t), &new_curs)) != 0)
+		return (ret);
+	if ((ret = __os_malloc(dbenv,
+	    dbc->dbp->pgsize, NULL, &new_curs->split_buf)) != 0) {
+		__os_free(new_curs, sizeof(*new_curs));
+		return (ret);
+	}
+
+	dbc->internal = (DBC_INTERNAL *) new_curs;
+	dbc->c_close = __db_c_close;
+	dbc->c_count = __db_c_count;
+	dbc->c_del = __db_c_del;
+	dbc->c_dup = __db_c_dup;
+	dbc->c_get = __db_c_get;
+	dbc->c_put = __db_c_put;
+	dbc->c_am_close = __ham_c_close;
+	dbc->c_am_del = __ham_c_del;
+	dbc->c_am_destroy = __ham_c_destroy;
+	dbc->c_am_get = __ham_c_get;
+	dbc->c_am_put = __ham_c_put;
+	dbc->c_am_writelock = __ham_c_writelock;
+
+	__ham_item_init(dbc);
+
+	return (0);
+}
+
+/*
+ * __ham_c_close --
+ *	Close down the cursor from a single use.
+ */
+static int
+__ham_c_close(dbc, root_pgno, rmroot)
+	DBC *dbc;
+	db_pgno_t root_pgno;
+	int *rmroot;
+{
+	HASH_CURSOR *hcp;
+	HKEYDATA *dp;
+	int doroot, gotmeta, ret, t_ret;
+	u_int32_t dirty;
+
+	COMPQUIET(rmroot, 0);
+	dirty = 0;
+	doroot = gotmeta = ret = 0;
+	hcp = (HASH_CURSOR *) dbc->internal;
+
+	/* Check for off page dups. */
+	if (dbc->internal->opd != NULL) {
+		if ((ret = __ham_get_meta(dbc)) != 0)
+			goto done;
+		gotmeta = 1;
+		if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0)
+			goto out;
+		dp = (HKEYDATA *)H_PAIRDATA(hcp->page, hcp->indx);
+		DB_ASSERT(HPAGE_PTYPE(dp) == H_OFFDUP);
+		memcpy(&root_pgno, HOFFPAGE_PGNO(dp), sizeof(db_pgno_t));
+
+		if ((ret =
+		    hcp->opd->c_am_close(hcp->opd, root_pgno, &doroot)) != 0)
+			goto out;
+		if (doroot != 0) {
+			if ((ret = __ham_del_pair(dbc, 1)) != 0)
+				goto out;
+			dirty = DB_MPOOL_DIRTY;
+		}
+	}
+
+out:	if (hcp->page != NULL && (t_ret =
+	    memp_fput(dbc->dbp->mpf, hcp->page, dirty)) != 0 && ret == 0)
+		ret = t_ret;
+	if (gotmeta != 0 && (t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+done:
+	__ham_item_init(dbc);
+	return (ret);
+}
+
+/*
+ * __ham_c_destroy --
+ *	Cleanup the access method private part of a cursor.
+ */
+static int
+__ham_c_destroy(dbc)
+	DBC *dbc;
+{
+	HASH_CURSOR *hcp;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if (hcp->split_buf != NULL)
+		__os_free(hcp->split_buf, dbc->dbp->pgsize);
+	__os_free(hcp, sizeof(HASH_CURSOR));
+
+	return (0);
+}
+
+/*
+ * __ham_c_count --
+ *	Return a count of on-page duplicates.
+ *
+ * PUBLIC: int __ham_c_count __P((DBC *, db_recno_t *));
+ */
+int
+__ham_c_count(dbc, recnop)
+	DBC *dbc;
+	db_recno_t *recnop;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	db_indx_t len;
+	db_recno_t recno;
+	int ret, t_ret;
+	u_int8_t *p, *pend;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *) dbc->internal;
+
+	recno = 0;
+
+	if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0)
+		return (ret);
+
+	switch (HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx))) {
+	case H_KEYDATA:
+	case H_OFFPAGE:
+		recno = 1;
+		break;
+	case H_DUPLICATE:
+		p = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
+		pend = p +
+		    LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
+		for (; p < pend; recno++) {
+			/* p may be odd, so copy rather than just dereffing */
+			memcpy(&len, p, sizeof(db_indx_t));
+			p += 2 * sizeof(db_indx_t) + len;
+		}
+
+		break;
+	default:
+		ret = __db_unknown_type(dbp->dbenv, "__ham_c_count",
+		    HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx)));
+		goto err;
+	}
+
+	*recnop = recno;
+
+err:	if ((t_ret = memp_fput(dbc->dbp->mpf, hcp->page, 0)) != 0 && ret == 0)
+		ret = t_ret;
+	hcp->page = NULL;
+	return (ret);
+}
+
+static int
+__ham_c_del(dbc)
+	DBC *dbc;
+{
+	DB *dbp;
+	DBT repldbt;
+	HASH_CURSOR *hcp;
+	int ret, t_ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if (F_ISSET(hcp, H_DELETED))
+		return (DB_NOTFOUND);
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto out;
+
+	if ((ret = __ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0)
+		goto out;
+
+	/* Off-page duplicates. */
+	if (HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP)
+		goto out;
+
+	if (F_ISSET(hcp, H_ISDUP)) { /* On-page duplicate. */
+		if (hcp->dup_off == 0 &&
+		    DUP_SIZE(hcp->dup_len) == LEN_HDATA(hcp->page,
+		    hcp->hdr->dbmeta.pagesize, hcp->indx))
+			ret = __ham_del_pair(dbc, 1);
+		else {
+			repldbt.flags = 0;
+			F_SET(&repldbt, DB_DBT_PARTIAL);
+			repldbt.doff = hcp->dup_off;
+			repldbt.dlen = DUP_SIZE(hcp->dup_len);
+			repldbt.size = 0;
+			repldbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->page,
+			    hcp->indx));
+			ret = __ham_replpair(dbc, &repldbt, 0);
+			hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
+			F_SET(hcp, H_DELETED);
+			ret = __ham_c_update(dbc, DUP_SIZE(hcp->dup_len), 0, 1);
+		}
+
+	} else /* Not a duplicate */
+		ret = __ham_del_pair(dbc, 1);
+
+out:	if (ret == 0 && hcp->page != NULL &&
+	    (t_ret = memp_fput(dbp->mpf, hcp->page, DB_MPOOL_DIRTY)) != 0)
+		ret = t_ret;
+	hcp->page = NULL;
+	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+	return (ret);
+}
+
+/*
+ * __ham_c_dup --
+ *	Duplicate a hash cursor, such that the new one holds appropriate
+ *	locks for the position of the original.
+ *
+ * PUBLIC: int __ham_c_dup __P((DBC *, DBC *));
+ */
+int
+__ham_c_dup(orig_dbc, new_dbc)
+	DBC *orig_dbc, *new_dbc;
+{
+	HASH_CURSOR *orig, *new;
+
+	orig = (HASH_CURSOR *)orig_dbc->internal;
+	new = (HASH_CURSOR *)new_dbc->internal;
+
+	new->bucket = orig->bucket;
+	new->lbucket = orig->lbucket;
+	new->dup_off = orig->dup_off;
+	new->dup_len = orig->dup_len;
+	new->dup_tlen = orig->dup_tlen;
+
+	if (F_ISSET(orig, H_DELETED))
+		F_SET(new, H_DELETED);
+	if (F_ISSET(orig, H_ISDUP))
+		F_SET(new, H_ISDUP);
+
+	/*
+	 * If the old cursor held a lock and we're not in transactions, get one
+	 * for the new one.   The reason that we don't need a new lock if we're
+	 * in a transaction is because we already hold a lock and will continue
+	 * to do so until commit, so there is no point in reaquiring it. We
+	 * don't know if the old lock was a read or write lock, but it doesn't
+	 * matter. We'll get a read lock.  We know that this locker already
+	 * holds a lock of the correct type, so if we need a write lock and
+	 * request it, we know that we'll get it.
+	 */
+	if (orig->lock.off == LOCK_INVALID || orig_dbc->txn != NULL)
+		return (0);
+
+	return (__ham_lock_bucket(new_dbc, DB_LOCK_READ));
+}
+
+static int
+__ham_c_get(dbc, key, data, flags, pgnop)
+	DBC *dbc;
+	DBT *key;
+	DBT *data;
+	u_int32_t flags;
+	db_pgno_t *pgnop;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	db_lockmode_t lock_type;
+	int get_key, ret, t_ret;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	dbp = dbc->dbp;
+
+	/* Clear OR'd in additional bits so we can check for flag equality. */
+	if (F_ISSET(dbc, DBC_RMW))
+		lock_type = DB_LOCK_WRITE;
+	else
+		lock_type = DB_LOCK_READ;
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		return (ret);
+	hcp->seek_size = 0;
+
+	ret = 0;
+	get_key = 1;
+	switch (flags) {
+	case DB_PREV_NODUP:
+		F_SET(hcp, H_NEXT_NODUP);
+		/* FALLTHROUGH */
+	case DB_PREV:
+		if (IS_INITIALIZED(dbc)) {
+			ret = __ham_item_prev(dbc, lock_type, pgnop);
+			break;
+		}
+		/* FALLTHROUGH */
+	case DB_LAST:
+		ret = __ham_item_last(dbc, lock_type, pgnop);
+		break;
+	case DB_NEXT_NODUP:
+		F_SET(hcp, H_NEXT_NODUP);
+		/* FALLTHROUGH */
+	case DB_NEXT:
+		if (IS_INITIALIZED(dbc)) {
+			ret = __ham_item_next(dbc, lock_type, pgnop);
+			break;
+		}
+		/* FALLTHROUGH */
+	case DB_FIRST:
+		ret = __ham_item_first(dbc, lock_type, pgnop);
+		break;
+	case DB_NEXT_DUP:
+		/* cgetchk has already determined that the cursor is set. */
+		F_SET(hcp, H_DUPONLY);
+		ret = __ham_item_next(dbc, lock_type, pgnop);
+		break;
+	case DB_SET:
+	case DB_SET_RANGE:
+	case DB_GET_BOTH:
+		ret = __ham_lookup(dbc, key, 0, lock_type, pgnop);
+		get_key = 0;
+		break;
+	case DB_GET_BOTHC:
+		F_SET(hcp, H_DUPONLY);
+
+		ret = __ham_item_next(dbc, lock_type, pgnop);
+		get_key = 0;
+		break;
+	case DB_CURRENT:
+		/* cgetchk has already determined that the cursor is set. */
+		if (F_ISSET(hcp, H_DELETED)) {
+			ret = DB_KEYEMPTY;
+			goto err;
+		}
+
+		ret = __ham_item(dbc, lock_type, pgnop);
+		break;
+	}
+
+	/*
+	 * Must always enter this loop to do error handling and
+	 * check for big key/data pair.
+	 */
+	for (;;) {
+		if (ret != 0 && ret != DB_NOTFOUND)
+			goto err;
+		else if (F_ISSET(hcp, H_OK)) {
+			if (*pgnop == PGNO_INVALID)
+				ret = __ham_dup_return (dbc, data, flags);
+			break;
+		} else if (!F_ISSET(hcp, H_NOMORE)) {
+			__db_err(dbp->dbenv,
+			     "H_NOMORE returned to __ham_c_get");
+			ret = EINVAL;
+			break;
+		}
+
+		/*
+		 * Ran out of entries in a bucket; change buckets.
+		 */
+		switch (flags) {
+			case DB_LAST:
+			case DB_PREV:
+			case DB_PREV_NODUP:
+				ret = memp_fput(dbp->mpf, hcp->page, 0);
+				hcp->page = NULL;
+				if (hcp->bucket == 0) {
+					ret = DB_NOTFOUND;
+					hcp->pgno = PGNO_INVALID;
+					goto err;
+				}
+				F_CLR(hcp, H_ISDUP);
+				hcp->bucket--;
+				hcp->indx = NDX_INVALID;
+				hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+				if (ret == 0)
+					ret = __ham_item_prev(dbc,
+					    lock_type, pgnop);
+				break;
+			case DB_FIRST:
+			case DB_NEXT:
+			case DB_NEXT_NODUP:
+				ret = memp_fput(dbp->mpf, hcp->page, 0);
+				hcp->page = NULL;
+				hcp->indx = NDX_INVALID;
+				hcp->bucket++;
+				F_CLR(hcp, H_ISDUP);
+				hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+				if (hcp->bucket > hcp->hdr->max_bucket) {
+					ret = DB_NOTFOUND;
+					hcp->pgno = PGNO_INVALID;
+					goto err;
+				}
+				if (ret == 0)
+					ret = __ham_item_next(dbc,
+					    lock_type, pgnop);
+				break;
+			case DB_GET_BOTH:
+			case DB_GET_BOTHC:
+			case DB_NEXT_DUP:
+			case DB_SET:
+			case DB_SET_RANGE:
+				/* Key not found. */
+				ret = DB_NOTFOUND;
+				goto err;
+			case DB_CURRENT:
+				/*
+				 * This should only happen if you are doing
+				 * deletes and reading with concurrent threads
+				 * and not doing proper locking.  We return
+				 * the same error code as we would if the
+				 * cursor were deleted.
+				 */
+				ret = DB_KEYEMPTY;
+				goto err;
+			default:
+				DB_ASSERT(0);
+		}
+	}
+
+	if (get_key == 0)
+		F_SET(key, DB_DBT_ISSET);
+
+err:	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+	F_CLR(hcp, H_DUPONLY);
+	F_CLR(hcp, H_NEXT_NODUP);
+
+	return (ret);
+}
+
+static int
+__ham_c_put(dbc, key, data, flags, pgnop)
+	DBC *dbc;
+	DBT *key;
+	DBT *data;
+	u_int32_t flags;
+	db_pgno_t *pgnop;
+{
+	DB *dbp;
+	DBT tmp_val, *myval;
+	HASH_CURSOR *hcp;
+	u_int32_t nbytes;
+	int ret, t_ret;
+
+	/*
+	 * The compiler doesn't realize that we only use this when ret is
+	 * equal to 0 and that if ret is equal to 0, that we must have set
+	 * myval.  So, we initialize it here to shut the compiler up.
+	 */
+	COMPQUIET(myval, NULL);
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if (F_ISSET(hcp, H_DELETED) &&
+	    flags != DB_KEYFIRST && flags != DB_KEYLAST)
+		return (DB_NOTFOUND);
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto err1;
+
+	switch (flags) {
+	case DB_KEYLAST:
+	case DB_KEYFIRST:
+	case DB_NODUPDATA:
+		nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE :
+		    HKEYDATA_PSIZE(key->size)) +
+		    (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE :
+		    HKEYDATA_PSIZE(data->size));
+		if ((ret = __ham_lookup(dbc,
+		    key, nbytes, DB_LOCK_WRITE, pgnop)) == DB_NOTFOUND) {
+			ret = 0;
+			if (hcp->seek_found_page != PGNO_INVALID &&
+			    hcp->seek_found_page != hcp->pgno) {
+				if ((ret = memp_fput(dbp->mpf, hcp->page, 0))
+				    != 0)
+					goto err2;
+				hcp->page = NULL;
+				hcp->pgno = hcp->seek_found_page;
+				hcp->indx = NDX_INVALID;
+			}
+
+			if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
+				/*
+				 * A partial put, but the key does not exist
+				 * and we are not beginning the write at 0.
+				 * We must create a data item padded up to doff
+				 * and then write the new bytes represented by
+				 * val.
+				 */
+				if ((ret = __ham_init_dbt(dbp->dbenv,
+				    &tmp_val, data->size + data->doff,
+				    &dbc->rdata.data, &dbc->rdata.ulen)) == 0) {
+					memset(tmp_val.data, 0, data->doff);
+					memcpy((u_int8_t *)tmp_val.data +
+					    data->doff, data->data, data->size);
+					myval = &tmp_val;
+				}
+			} else
+				myval = (DBT *)data;
+
+			if (ret == 0)
+				ret = __ham_add_el(dbc, key, myval, H_KEYDATA);
+			goto done;
+		}
+		break;
+	case DB_BEFORE:
+	case DB_AFTER:
+	case DB_CURRENT:
+		ret = __ham_item(dbc, DB_LOCK_WRITE, pgnop);
+		break;
+	}
+
+	if (*pgnop == PGNO_INVALID && ret == 0) {
+		if (flags == DB_CURRENT ||
+		    ((flags == DB_KEYFIRST ||
+		    flags == DB_KEYLAST || flags == DB_NODUPDATA) &&
+		    !(F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))))
+			ret = __ham_overwrite(dbc, data, flags);
+		else
+			ret = __ham_add_dup(dbc, data, flags, pgnop);
+	}
+
+done:	if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
+		ret = __ham_expand_table(dbc);
+		F_CLR(hcp, H_EXPAND);
+	}
+
+	if (ret == 0 &&
+	    (t_ret = memp_fset(dbp->mpf, hcp->page, DB_MPOOL_DIRTY)) != 0)
+		ret = t_ret;
+
+err2:	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+err1:	return (ret);
+}
+
+/********************************* UTILITIES ************************/
+
+/*
+ * __ham_expand_table --
+ */
+static int
+__ham_expand_table(dbc)
+	DBC *dbc;
+{
+	DB *dbp;
+	PAGE *h;
+	HASH_CURSOR *hcp;
+	db_pgno_t pgno;
+	u_int32_t old_bucket, new_bucket;
+	int ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if ((ret = __ham_dirty_meta(dbc)) != 0)
+		return (ret);
+
+	/*
+	 * If the split point is about to increase, make sure that we
+	 * have enough extra pages.  The calculation here is weird.
+	 * We'd like to do this after we've upped max_bucket, but it's
+	 * too late then because we've logged the meta-data split.  What
+	 * we'll do between then and now is increment max bucket and then
+	 * see what the log of one greater than that is; here we have to
+	 * look at the log of max + 2.  VERY NASTY STUFF.
+	 *
+	 * It just got even nastier.  With subdatabases, we have to request
+	 * a chunk of contiguous pages, so we do that here using an
+	 * undocumented feature of mpool (the MPOOL_NEW_GROUP flag) to
+	 * give us a number of contiguous pages.  Ouch.
+	 */
+	if (hcp->hdr->max_bucket == hcp->hdr->high_mask) {
+		/*
+		 * Ask mpool to give us a set of contiguous page numbers
+		 * large enough to contain the next doubling.
+		 *
+		 * Figure out how many new pages we need.   This will return
+		 * us the last page.  We calculate its page number, initialize
+		 * the page and then write it back to reserve all the pages
+		 * in between.  It is possible that the allocation of new pages
+		 * has already been done, but the tranaction aborted.  Since
+		 * we don't undo the allocation, check for a valid pgno before
+		 * doing the allocation.
+		 */
+		pgno = hcp->hdr->max_bucket + 1;
+		if (hcp->hdr->spares[__db_log2(pgno) + 1] == PGNO_INVALID)
+			/* Allocate a group of pages. */
+			ret = memp_fget(dbp->mpf,
+			    &pgno, DB_MPOOL_NEW_GROUP, &h);
+		else {
+			/* Just read in the last page of the batch */
+			pgno = hcp->hdr->spares[__db_log2(pgno) + 1] +
+			    hcp->hdr->max_bucket + 1;
+			/* Move to the last page of the group. */
+			pgno += hcp->hdr->max_bucket;
+			ret = memp_fget(dbp->mpf,
+			    &pgno, DB_MPOOL_CREATE, &h);
+		}
+		if (ret != 0)
+			return (ret);
+
+		P_INIT(h, dbp->pgsize, pgno,
+		    PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+		pgno -= hcp->hdr->max_bucket;
+	} else {
+		pgno = BUCKET_TO_PAGE(hcp, hcp->hdr->max_bucket + 1);
+		if ((ret =
+		    memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0)
+			return (ret);
+	}
+
+	/* Now we can log the meta-data split. */
+	if (DB_LOGGING(dbc)) {
+		if ((ret = __ham_metagroup_log(dbp->dbenv,
+		    dbc->txn, &h->lsn, 0, dbp->log_fileid,
+		    hcp->hdr->max_bucket, pgno, &hcp->hdr->dbmeta.lsn,
+		    &h->lsn)) != 0) {
+			(void)memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
+			return (ret);
+		}
+
+		hcp->hdr->dbmeta.lsn = h->lsn;
+	}
+
+	/* If we allocated some new pages, write out the last page. */
+	if ((ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+		return (ret);
+
+	new_bucket = ++hcp->hdr->max_bucket;
+	old_bucket = (hcp->hdr->max_bucket & hcp->hdr->low_mask);
+
+	/*
+	 * If we started a new doubling, fill in the spares array with
+	 * the starting page number negatively offset by the bucket number.
+	 */
+	if (new_bucket > hcp->hdr->high_mask) {
+		/* Starting a new doubling */
+		hcp->hdr->low_mask = hcp->hdr->high_mask;
+		hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask;
+		if (hcp->hdr->spares[__db_log2(new_bucket) + 1] == PGNO_INVALID)
+			hcp->hdr->spares[__db_log2(new_bucket) + 1] =
+			    pgno - new_bucket;
+	}
+
+	/* Relocate records to the new bucket */
+	return (__ham_split_page(dbc, old_bucket, new_bucket));
+}
+
+/*
+ * PUBLIC: u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, int32_t));
+ */
+u_int32_t
+__ham_call_hash(dbc, k, len)
+	DBC *dbc;
+	u_int8_t *k;
+	int32_t len;
+{
+	DB *dbp;
+	u_int32_t n, bucket;
+	HASH_CURSOR *hcp;
+	HASH *hashp;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	hashp = dbp->h_internal;
+
+	n = (u_int32_t)(hashp->h_hash(dbp, k, len));
+
+	bucket = n & hcp->hdr->high_mask;
+	if (bucket > hcp->hdr->max_bucket)
+		bucket = bucket & hcp->hdr->low_mask;
+	return (bucket);
+}
+
+/*
+ * Check for duplicates, and call __db_ret appropriately.  Release
+ * everything held by the cursor.
+ */
+static int
+__ham_dup_return (dbc, val, flags)
+	DBC *dbc;
+	DBT *val;
+	u_int32_t flags;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	PAGE *pp;
+	DBT *myval, tmp_val;
+	db_indx_t ndx;
+	db_pgno_t pgno;
+	u_int32_t off, tlen;
+	u_int8_t *hk, type;
+	int cmp, ret;
+	db_indx_t len;
+
+	/* Check for duplicate and return the first one. */
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	ndx = H_DATAINDEX(hcp->indx);
+	type = HPAGE_TYPE(hcp->page, ndx);
+	pp = hcp->page;
+	myval = val;
+
+	/*
+	 * There are 4 cases:
+	 * 1. We are not in duplicate, simply return; the upper layer
+	 *    will do the right thing.
+	 * 2. We are looking at keys and stumbled onto a duplicate.
+	 * 3. We are in the middle of a duplicate set. (ISDUP set)
+	 * 4. We need to check for particular data match.
+	 */
+
+	/* We should never get here with off-page dups. */
+	DB_ASSERT(type != H_OFFDUP);
+
+	/* Case 1 */
+	if (type != H_DUPLICATE &&
+	    flags != DB_GET_BOTH && flags != DB_GET_BOTHC)
+		return (0);
+
+	/*
+	 * Here we check for the case where we just stumbled onto a
+	 * duplicate.  In this case, we do initialization and then
+	 * let the normal duplicate code handle it. (Case 2)
+	 */
+	if (!F_ISSET(hcp, H_ISDUP) && type == H_DUPLICATE) {
+		F_SET(hcp, H_ISDUP);
+		hcp->dup_tlen = LEN_HDATA(hcp->page,
+		    hcp->hdr->dbmeta.pagesize, hcp->indx);
+		hk = H_PAIRDATA(hcp->page, hcp->indx);
+		if (flags == DB_LAST
+		    || flags == DB_PREV || flags == DB_PREV_NODUP) {
+			hcp->dup_off = 0;
+			do {
+				memcpy(&len,
+				    HKEYDATA_DATA(hk) + hcp->dup_off,
+				    sizeof(db_indx_t));
+				hcp->dup_off += DUP_SIZE(len);
+			} while (hcp->dup_off < hcp->dup_tlen);
+			hcp->dup_off -= DUP_SIZE(len);
+		} else {
+			memcpy(&len,
+			    HKEYDATA_DATA(hk), sizeof(db_indx_t));
+			hcp->dup_off = 0;
+		}
+		hcp->dup_len = len;
+	}
+
+	/*
+	 * If we are retrieving a specific key/data pair, then we
+	 * may need to adjust the cursor before returning data.
+	 * Case 4
+	 */
+	if (flags == DB_GET_BOTH || flags == DB_GET_BOTHC) {
+		if (F_ISSET(hcp, H_ISDUP)) {
+			/*
+			 * If we're doing a join, search forward from the
+			 * current position, not the beginning of the dup set.
+			 */
+			if (flags == DB_GET_BOTHC)
+				F_SET(hcp, H_CONTINUE);
+
+			__ham_dsearch(dbc, val, &off, &cmp);
+
+			/*
+			 * This flag is set nowhere else and is safe to
+			 * clear unconditionally.
+			 */
+			F_CLR(hcp, H_CONTINUE);
+			hcp->dup_off = off;
+		} else {
+			hk = H_PAIRDATA(hcp->page, hcp->indx);
+			if (((HKEYDATA *)hk)->type == H_OFFPAGE) {
+				memcpy(&tlen,
+				    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
+				memcpy(&pgno,
+				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
+				if ((ret = __db_moff(dbp, val,
+				    pgno, tlen, dbp->dup_compare, &cmp)) != 0)
+					return (ret);
+			} else {
+				/*
+				 * We do not zero tmp_val since the comparison
+				 * routines may only look at data and size.
+				 */
+				tmp_val.data = HKEYDATA_DATA(hk);
+				tmp_val.size = LEN_HDATA(hcp->page,
+				    dbp->pgsize, hcp->indx);
+				cmp = dbp->dup_compare == NULL ?
+				    __bam_defcmp(dbp, &tmp_val, val) :
+				    dbp->dup_compare(dbp, &tmp_val, val);
+			}
+		}
+
+		if (cmp != 0)
+			return (DB_NOTFOUND);
+	}
+
+	/*
+	 * Now, everything is initialized, grab a duplicate if
+	 * necessary.
+	 */
+	if (F_ISSET(hcp, H_ISDUP)) {	/* Case 3 */
+		/*
+		 * Copy the DBT in case we are retrieving into user
+		 * memory and we need the parameters for it.  If the
+		 * user requested a partial, then we need to adjust
+		 * the user's parameters to get the partial of the
+		 * duplicate which is itself a partial.
+		 */
+		memcpy(&tmp_val, val, sizeof(*val));
+		if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) {
+			/*
+			 * Take the user's length unless it would go
+			 * beyond the end of the duplicate.
+			 */
+			if (tmp_val.doff + hcp->dup_off > hcp->dup_len)
+				tmp_val.dlen = 0;
+			else if (tmp_val.dlen + tmp_val.doff >
+			    hcp->dup_len)
+				tmp_val.dlen =
+				    hcp->dup_len - tmp_val.doff;
+
+			/*
+			 * Calculate the new offset.
+			 */
+			tmp_val.doff += hcp->dup_off;
+		} else {
+			F_SET(&tmp_val, DB_DBT_PARTIAL);
+			tmp_val.dlen = hcp->dup_len;
+			tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
+		}
+		myval = &tmp_val;
+	}
+
+	/*
+	 * Finally, if we had a duplicate, pp, ndx, and myval should be
+	 * set appropriately.
+	 */
+	if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata.data,
+	    &dbc->rdata.ulen)) != 0)
+		return (ret);
+
+	/*
+	 * In case we sent a temporary off to db_ret, set the real
+	 * return values.
+	 */
+	val->data = myval->data;
+	val->size = myval->size;
+
+	F_SET(val, DB_DBT_ISSET);
+
+	return (0);
+}
+
+static int
+__ham_overwrite(dbc, nval, flags)
+	DBC *dbc;
+	DBT *nval;
+	u_int32_t flags;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	DBT *myval, tmp_val, tmp_val2;
+	void *newrec;
+	u_int8_t *hk, *p;
+	u_int32_t len, nondup_size;
+	db_indx_t newsize;
+	int ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if (F_ISSET(hcp, H_ISDUP)) {
+		/*
+		 * This is an overwrite of a duplicate. We should never
+		 * be off-page at this point.
+		 */
+		DB_ASSERT(hcp->opd == NULL);
+		/* On page dups */
+		if (F_ISSET(nval, DB_DBT_PARTIAL)) {
+			/*
+			 * We're going to have to get the current item, then
+			 * construct the record, do any padding and do a
+			 * replace.
+			 */
+			memset(&tmp_val, 0, sizeof(tmp_val));
+			if ((ret =
+			    __ham_dup_return (dbc, &tmp_val, DB_CURRENT)) != 0)
+				return (ret);
+
+			/* Figure out new size. */
+			nondup_size = tmp_val.size;
+			newsize = nondup_size;
+
+			/*
+			 * Three cases:
+			 * 1. strictly append (may need to allocate space
+			 *	for pad bytes; really gross).
+			 * 2. overwrite some and append.
+			 * 3. strictly overwrite.
+			 */
+			if (nval->doff > nondup_size)
+				newsize +=
+				    (nval->doff - nondup_size + nval->size);
+			else if (nval->doff + nval->dlen > nondup_size)
+				newsize += nval->size -
+				    (nondup_size - nval->doff);
+			else
+				newsize += nval->size - nval->dlen;
+
+			/*
+			 * Make sure that the new size doesn't put us over
+			 * the onpage duplicate size in which case we need
+			 * to convert to off-page duplicates.
+			 */
+			if (ISBIG(hcp, hcp->dup_tlen - nondup_size + newsize)) {
+				if ((ret = __ham_dup_convert(dbc)) != 0)
+					return (ret);
+				return (hcp->opd->c_am_put(hcp->opd,
+				    NULL, nval, flags, NULL));
+			}
+
+			if ((ret = __os_malloc(dbp->dbenv,
+			    DUP_SIZE(newsize), NULL, &newrec)) != 0)
+				return (ret);
+			memset(&tmp_val2, 0, sizeof(tmp_val2));
+			F_SET(&tmp_val2, DB_DBT_PARTIAL);
+
+			/* Construct the record. */
+			p = newrec;
+			/* Initial size. */
+			memcpy(p, &newsize, sizeof(db_indx_t));
+			p += sizeof(db_indx_t);
+
+			/* First part of original record. */
+			len = nval->doff > tmp_val.size
+			    ? tmp_val.size : nval->doff;
+			memcpy(p, tmp_val.data, len);
+			p += len;
+
+			if (nval->doff > tmp_val.size) {
+				/* Padding */
+				memset(p, 0, nval->doff - tmp_val.size);
+				p += nval->doff - tmp_val.size;
+			}
+
+			/* New bytes */
+			memcpy(p, nval->data, nval->size);
+			p += nval->size;
+
+			/* End of original record (if there is any) */
+			if (nval->doff + nval->dlen < tmp_val.size) {
+				len = tmp_val.size - nval->doff - nval->dlen;
+				memcpy(p, (u_int8_t *)tmp_val.data +
+				    nval->doff + nval->dlen, len);
+				p += len;
+			}
+
+			/* Final size. */
+			memcpy(p, &newsize, sizeof(db_indx_t));
+
+			/*
+			 * Make sure that the caller isn't corrupting
+			 * the sort order.
+			 */
+			if (dbp->dup_compare != NULL) {
+				tmp_val2.data =
+				    (u_int8_t *)newrec + sizeof(db_indx_t);
+				tmp_val2.size = newsize;
+				if (dbp->dup_compare(
+				    dbp, &tmp_val, &tmp_val2) != 0) {
+					(void)__os_free(newrec,
+					    DUP_SIZE(newsize));
+					return (__db_duperr(dbp, flags));
+				}
+			}
+
+			tmp_val2.data = newrec;
+			tmp_val2.size = DUP_SIZE(newsize);
+			tmp_val2.doff = hcp->dup_off;
+			tmp_val2.dlen = DUP_SIZE(hcp->dup_len);
+
+			ret = __ham_replpair(dbc, &tmp_val2, 0);
+			(void)__os_free(newrec, DUP_SIZE(newsize));
+
+			/* Update cursor */
+			if (ret != 0)
+				return (ret);
+
+			if (newsize > nondup_size)
+				hcp->dup_tlen += (newsize - nondup_size);
+			else
+				hcp->dup_tlen -= (nondup_size - newsize);
+			hcp->dup_len = DUP_SIZE(newsize);
+			return (0);
+		} else {
+			/* Check whether we need to convert to off page. */
+			if (ISBIG(hcp,
+			    hcp->dup_tlen - hcp->dup_len + nval->size)) {
+				if ((ret = __ham_dup_convert(dbc)) != 0)
+					return (ret);
+				return (hcp->opd->c_am_put(hcp->opd,
+				    NULL, nval, flags, NULL));
+			}
+
+			/* Make sure we maintain sort order. */
+			if (dbp->dup_compare != NULL) {
+				tmp_val2.data =
+				    HKEYDATA_DATA(H_PAIRDATA(hcp->page,
+				    hcp->indx)) + hcp->dup_off +
+				    sizeof(db_indx_t);
+				tmp_val2.size = hcp->dup_len;
+				if (dbp->dup_compare(dbp, nval, &tmp_val2) != 0)
+					return (EINVAL);
+			}
+			/* Overwriting a complete duplicate. */
+			if ((ret =
+			    __ham_make_dup(dbp->dbenv, nval,
+			    &tmp_val, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
+				return (ret);
+			/* Now fix what we are replacing. */
+			tmp_val.doff = hcp->dup_off;
+			tmp_val.dlen = DUP_SIZE(hcp->dup_len);
+
+			/* Update cursor */
+			if (nval->size > hcp->dup_len)
+				hcp->dup_tlen += (nval->size - hcp->dup_len);
+			else
+				hcp->dup_tlen -= (hcp->dup_len - nval->size);
+			hcp->dup_len = DUP_SIZE(nval->size);
+		}
+		myval = &tmp_val;
+	} else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
+		/* Put/overwrite */
+		memcpy(&tmp_val, nval, sizeof(*nval));
+		F_SET(&tmp_val, DB_DBT_PARTIAL);
+		tmp_val.doff = 0;
+		hk = H_PAIRDATA(hcp->page, hcp->indx);
+		if (HPAGE_PTYPE(hk) == H_OFFPAGE)
+			memcpy(&tmp_val.dlen,
+			    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
+		else
+			tmp_val.dlen = LEN_HDATA(hcp->page,
+			    hcp->hdr->dbmeta.pagesize, hcp->indx);
+		myval = &tmp_val;
+	} else
+		/* Regular partial put */
+		myval = nval;
+
+	return (__ham_replpair(dbc, myval, 0));
+}
+
+/*
+ * Given a key and a cursor, sets the cursor to the page/ndx on which
+ * the key resides.  If the key is found, the cursor H_OK flag is set
+ * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
+ * If the key is not found, the H_OK flag is not set.  If the sought
+ * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
+ * are set indicating where an add might take place.  If it is 0,
+ * non of the cursor pointer field are valid.
+ */
+static int
+__ham_lookup(dbc, key, sought, mode, pgnop)
+	DBC *dbc;
+	const DBT *key;
+	u_int32_t sought;
+	db_lockmode_t mode;
+	db_pgno_t *pgnop;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	db_pgno_t pgno;
+	u_int32_t tlen;
+	int match, ret;
+	u_int8_t *hk, *dk;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	/*
+	 * Set up cursor so that we're looking for space to add an item
+	 * as we cycle through the pages looking for the key.
+	 */
+	if ((ret = __ham_item_reset(dbc)) != 0)
+		return (ret);
+	hcp->seek_size = sought;
+
+	hcp->bucket = __ham_call_hash(dbc, (u_int8_t *)key->data, key->size);
+	hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+
+	while (1) {
+		*pgnop = PGNO_INVALID;
+		if ((ret = __ham_item_next(dbc, mode, pgnop)) != 0)
+			return (ret);
+
+		if (F_ISSET(hcp, H_NOMORE))
+			break;
+
+		hk = H_PAIRKEY(hcp->page, hcp->indx);
+		switch (HPAGE_PTYPE(hk)) {
+		case H_OFFPAGE:
+			memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
+			if (tlen == key->size) {
+				memcpy(&pgno,
+				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
+				if ((ret = __db_moff(dbp,
+				    key, pgno, tlen, NULL, &match)) != 0)
+					return (ret);
+				if (match == 0)
+					goto found_key;
+			}
+			break;
+		case H_KEYDATA:
+			if (key->size ==
+			    LEN_HKEY(hcp->page, dbp->pgsize, hcp->indx) &&
+			    memcmp(key->data,
+			    HKEYDATA_DATA(hk), key->size) == 0) {
+				/* Found the key, check for data type. */
+found_key:			F_SET(hcp, H_OK);
+				dk = H_PAIRDATA(hcp->page, hcp->indx);
+				if (HPAGE_PTYPE(dk) == H_OFFDUP)
+					memcpy(pgnop, HOFFDUP_PGNO(dk),
+					    sizeof(db_pgno_t));
+				return (0);
+			}
+			break;
+		case H_DUPLICATE:
+		case H_OFFDUP:
+			/*
+			 * These are errors because keys are never
+			 * duplicated, only data items are.
+			 */
+			return (__db_pgfmt(dbp, PGNO(hcp->page)));
+		}
+	}
+
+	/*
+	 * Item was not found.
+	 */
+
+	if (sought != 0)
+		return (ret);
+
+	return (ret);
+}
+
+/*
+ * __ham_init_dbt --
+ *	Initialize a dbt using some possibly already allocated storage
+ *	for items.
+ *
+ * PUBLIC: int __ham_init_dbt __P((DB_ENV *,
+ * PUBLIC:     DBT *, u_int32_t, void **, u_int32_t *));
+ */
+int
+__ham_init_dbt(dbenv, dbt, size, bufp, sizep)
+	DB_ENV *dbenv;
+	DBT *dbt;
+	u_int32_t size;
+	void **bufp;
+	u_int32_t *sizep;
+{
+	int ret;
+
+	memset(dbt, 0, sizeof(*dbt));
+	if (*sizep < size) {
+		if ((ret = __os_realloc(dbenv, size, NULL, bufp)) != 0) {
+			*sizep = 0;
+			return (ret);
+		}
+		*sizep = size;
+	}
+	dbt->data = *bufp;
+	dbt->size = size;
+	return (0);
+}
+
+/*
+ * Adjust the cursor after an insert or delete.  The cursor passed is
+ * the one that was operated upon; we just need to check any of the
+ * others.
+ *
+ * len indicates the length of the item added/deleted
+ * add indicates if the item indicated by the cursor has just been
+ * added (add == 1) or deleted (add == 0).
+ * dup indicates if the addition occurred into a duplicate set.
+ *
+ * PUBLIC: int __ham_c_update
+ * PUBLIC:    __P((DBC *, u_int32_t, int, int));
+ */
+int
+__ham_c_update(dbc, len, add, is_dup)
+	DBC *dbc;
+	u_int32_t len;
+	int add, is_dup;
+{
+	DB *dbp, *ldbp;
+	DBC *cp;
+	DB_ENV *dbenv;
+	DB_LSN lsn;
+	DB_TXN *my_txn;
+	HASH_CURSOR *hcp, *lcp;
+	int found, ret;
+	u_int32_t order;
+
+	dbp = dbc->dbp;
+	dbenv = dbp->dbenv;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	/*
+	 * Adjustment will only be logged if this is a subtransaction.
+	 * Only subtransactions can abort and effect their parent
+	 * transactions cursors.
+	 */
+
+	my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
+	found = 0;
+
+	MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+
+	/*
+	 * Calcuate the order of this deleted record.
+	 * This will be one grater than any cursor that is pointing
+	 * at this record and already marked as deleted.
+	 */
+	order = 0;
+	if (!add) {
+		order = 1;
+		for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+		    ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+		    ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+			MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+			for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+			    cp = TAILQ_NEXT(cp, links)) {
+				if (cp == dbc || cp->dbtype != DB_HASH)
+					continue;
+				lcp = (HASH_CURSOR *)cp->internal;
+				if (F_ISSET(lcp, H_DELETED) &&
+				     hcp->pgno == lcp->pgno &&
+				     hcp->indx == lcp->indx &&
+				     order <= lcp->order &&
+				     (!is_dup || hcp->dup_off == lcp->dup_off))
+					order = lcp->order +1;
+			}
+			MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+		}
+		hcp->order = order;
+	}
+
+	for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+	    ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+	    ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+		MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+		for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+		    cp = TAILQ_NEXT(cp, links)) {
+			if (cp == dbc || cp->dbtype != DB_HASH)
+				continue;
+
+			lcp = (HASH_CURSOR *)cp->internal;
+
+			if (lcp->pgno != hcp->pgno || lcp->indx == NDX_INVALID)
+				continue;
+
+			if (my_txn != NULL && cp->txn != my_txn)
+				found = 1;
+
+			if (!is_dup) {
+				if (add) {
+					/*
+					 * This routine is not called to add
+					 * non-dup records which are always put
+					 * at the end.  It is only called from
+					 * recovery in this case and the
+					 * cursor will be marked deleted.
+					 * We are "undeleting" so unmark all
+					 * cursors with the same order.
+					 */
+					if (lcp->indx == hcp->indx
+					     && F_ISSET(lcp, H_DELETED)) {
+						if (lcp->order == hcp->order)
+							F_CLR(lcp, H_DELETED);
+						else if (lcp->order >
+						    hcp->order) {
+
+						/*
+						 * If we've moved this cursor's
+						 * index, split its order
+						 * number--i.e., decrement it by
+						 * enough so that the lowest
+						 * cursor moved has order 1.
+						 * cp_arg->order is the split
+						 * point, so decrement by one
+						 * less than that.
+						 */
+							lcp->order -=
+							    (hcp->order - 1);
+							lcp->indx += 2;
+						}
+					} else if (lcp->indx >= hcp->indx)
+						lcp->indx += 2;
+
+				} else {
+					if (lcp->indx > hcp->indx) {
+						lcp->indx -= 2;
+						if (lcp->indx == hcp->indx
+						    && F_ISSET(lcp, H_DELETED))
+							lcp->order += order;
+					} else if (lcp->indx == hcp->indx
+					    && !F_ISSET(lcp, H_DELETED)) {
+						F_SET(lcp, H_DELETED);
+						lcp->order = order;
+					}
+				}
+			} else if (lcp->indx == hcp->indx) {
+				/*
+				 * Handle duplicates.  This routine is
+				 * only called for on page dups.
+				 * Off page dups are handled by btree/rtree
+				 * code.
+				 */
+				if (add) {
+					lcp->dup_tlen += len;
+					if (lcp->dup_off == hcp->dup_off
+					     && F_ISSET(hcp, H_DELETED)
+					     && F_ISSET(lcp, H_DELETED)) {
+					     /* Abort of a delete. */
+						if (lcp->order == hcp->order)
+							F_CLR(lcp, H_DELETED);
+						else if (lcp->order >
+						    hcp->order) {
+							lcp->order -=
+							    (hcp->order -1);
+							lcp->dup_off += len;
+						}
+					} else if (lcp->dup_off >= hcp->dup_off)
+						lcp->dup_off += len;
+				} else {
+					lcp->dup_tlen -= len;
+					if (lcp->dup_off > hcp->dup_off) {
+						lcp->dup_off -= len;
+						if (lcp->dup_off == hcp->dup_off
+						     && F_ISSET(lcp, H_DELETED))
+							lcp->order += order;
+					} else if (lcp->dup_off ==
+					    hcp->dup_off &&
+					    !F_ISSET(lcp, H_DELETED)) {
+						F_SET(lcp, H_DELETED);
+						lcp->order = order;
+					}
+				}
+			}
+		}
+		MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+	}
+	MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+	if (found != 0 && DB_LOGGING(dbc)) {
+		if ((ret = __ham_curadj_log(dbenv,
+		     my_txn, &lsn, 0, dbp->log_fileid, hcp->pgno,
+		     hcp->indx, len, hcp->dup_off, add, is_dup, order)) != 0)
+			return (ret);
+	}
+
+	return (0);
+}
+
+/*
+ * __ham_get_clist --
+ *
+ * Get a list of cursors either on a particular bucket or on a particular
+ * page and index combination.  The former is so that we can update
+ * cursors on a split.  The latter is so we can update cursors when we
+ * move items off page.
+ *
+ * PUBLIC: int __ham_get_clist __P((DB *,
+ * PUBLIC:     db_pgno_t, u_int32_t, DBC ***));
+ */
+int
+__ham_get_clist(dbp, bucket, indx, listp)
+	DB *dbp;
+	db_pgno_t bucket;
+	u_int32_t indx;
+	DBC ***listp;
+{
+	DB *ldbp;
+	DBC *cp;
+	DB_ENV *dbenv;
+	int nalloc, nused, ret;
+
+	/*
+	 * Assume that finding anything is the exception, so optimize for
+	 * the case where there aren't any.
+	 */
+	nalloc = nused = 0;
+	*listp = NULL;
+	dbenv = dbp->dbenv;
+
+	MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+	for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+	    ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+	    ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+		MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+		for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+		    cp = TAILQ_NEXT(cp, links))
+			if (cp->dbtype == DB_HASH &&
+			    ((indx == NDX_INVALID &&
+			    ((HASH_CURSOR *)(cp->internal))->bucket
+			    == bucket) || (indx != NDX_INVALID &&
+			    cp->internal->pgno == bucket &&
+			    cp->internal->indx == indx))) {
+				if (nused >= nalloc) {
+					nalloc += 10;
+					if ((ret = __os_realloc(dbp->dbenv,
+					    nalloc * sizeof(HASH_CURSOR *),
+					    NULL, listp)) != 0)
+						return (ret);
+				}
+				(*listp)[nused++] = cp;
+			}
+
+		MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp);
+	}
+	MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+	if (listp != NULL) {
+		if (nused >= nalloc) {
+			nalloc++;
+			if ((ret = __os_realloc(dbp->dbenv,
+			    nalloc * sizeof(HASH_CURSOR *), NULL, listp)) != 0)
+				return (ret);
+		}
+		(*listp)[nused] = NULL;
+	}
+	return (0);
+}
+
+static int
+__ham_del_dups(orig_dbc, key)
+	DBC *orig_dbc;
+	DBT *key;
+{
+	DBC *dbc;
+	DBT data, lkey;
+	int ret, t_ret;
+
+	/* Allocate a cursor. */
+	if ((ret = orig_dbc->c_dup(orig_dbc, &dbc, 0)) != 0)
+		return (ret);
+
+	/*
+	 * Walk a cursor through the key/data pairs, deleting as we go.  Set
+	 * the DB_DBT_USERMEM flag, as this might be a threaded application
+	 * and the flags checking will catch us.  We don't actually want the
+	 * keys or data, so request a partial of length 0.
+	 */
+	memset(&lkey, 0, sizeof(lkey));
+	F_SET(&lkey, DB_DBT_USERMEM | DB_DBT_PARTIAL);
+	memset(&data, 0, sizeof(data));
+	F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL);
+
+	/* Walk through the set of key/data pairs, deleting as we go. */
+	if ((ret = dbc->c_get(dbc, key, &data, DB_SET)) != 0) {
+		if (ret == DB_NOTFOUND)
+			ret = 0;
+		goto err;
+	}
+
+	for (;;) {
+		if ((ret = dbc->c_del(dbc, 0)) != 0)
+			goto err;
+		if ((ret = dbc->c_get(dbc, &lkey, &data, DB_NEXT_DUP)) != 0) {
+			if (ret == DB_NOTFOUND) {
+				ret = 0;
+				break;
+			}
+			goto err;
+		}
+	}
+
+err:	/*
+	 * Discard the cursor.  This will cause the underlying off-page dup
+	 * tree to go away as well as the actual entry on the page.
+	 */
+	if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+	return (ret);
+
+}
+
+static int
+__ham_c_writelock(dbc)
+	DBC *dbc;
+{
+	HASH_CURSOR *hcp;
+	DB_LOCK tmp_lock;
+	int ret;
+
+	/*
+	 * All we need do is acquire the lock and let the off-page
+	 * dup tree do its thing.
+	 */
+	if (!STD_LOCKING(dbc))
+		return (0);
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if ((hcp->lock.off == LOCK_INVALID || hcp->lock_mode == DB_LOCK_READ)) {
+		tmp_lock = hcp->lock;
+		if ((ret = __ham_lock_bucket(dbc, DB_LOCK_WRITE)) != 0)
+			return (ret);
+		if (tmp_lock.off != LOCK_INVALID &&
+		    (ret = lock_put(dbc->dbp->dbenv, &tmp_lock)) != 0)
+			return (ret);
+	}
+	return (0);
+}
+
+/*
+ * __ham_c_chgpg --
+ *
+ * Adjust the cursors after moving an item from one page to another.
+ * If the old_index is NDX_INVALID, that means that we copied the
+ * page wholesale and we're leaving indices intact and just changing
+ * the page number.
+ *
+ * PUBLIC: int __ham_c_chgpg
+ * PUBLIC:    __P((DBC *, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t));
+ */
+int
+__ham_c_chgpg(dbc, old_pgno, old_index, new_pgno, new_index)
+	DBC *dbc;
+	db_pgno_t old_pgno, new_pgno;
+	u_int32_t old_index, new_index;
+{
+	DB *dbp, *ldbp;
+	DB_ENV *dbenv;
+	DB_LSN lsn;
+	DB_TXN *my_txn;
+	DBC *cp;
+	HASH_CURSOR *hcp;
+	int found, ret;
+
+	dbp = dbc->dbp;
+	dbenv = dbp->dbenv;
+
+	my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
+	found = 0;
+
+	MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+	for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+	    ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+	    ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+		MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+		for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+		    cp = TAILQ_NEXT(cp, links)) {
+			if (cp == dbc || cp->dbtype != DB_HASH)
+				continue;
+
+			hcp = (HASH_CURSOR *)cp->internal;
+			if (hcp->pgno == old_pgno) {
+				if (old_index == NDX_INVALID) {
+					hcp->pgno = new_pgno;
+				} else if (hcp->indx == old_index) {
+					hcp->pgno = new_pgno;
+					hcp->indx = new_index;
+				} else
+					continue;
+				if (my_txn != NULL && cp->txn != my_txn)
+					found = 1;
+			}
+		}
+		MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+	}
+	MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+	if (found != 0 && DB_LOGGING(dbc)) {
+		if ((ret = __ham_chgpg_log(dbenv,
+		     my_txn, &lsn, 0, dbp->log_fileid, DB_HAM_CHGPG,
+		     old_pgno, new_pgno, old_index, new_index)) != 0)
+			return (ret);
+	}
+	return (0);
+}
diff --git a/bdb/hash/hash.src b/bdb/hash/hash.src
new file mode 100644
index 00000000000..e6ecd11c907
--- /dev/null
+++ b/bdb/hash/hash.src
@@ -0,0 +1,361 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *	Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *	The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	$Id: hash.src,v 10.24 2000/12/12 17:41:48 bostic Exp $
+ */
+
+/*
+ * This is the source file used to create the logging functions for the
+ * hash package.  Each access method (or set of routines wishing to register
+ * record types with the transaction system) should have a file like this.
+ * Each type of log record and its parameters is defined.  The basic
+ * format of a record definition is:
+ *
+ * BEGIN	<RECORD_TYPE>
+ * ARG|STRING|POINTER	<variable name>	<variable type> <printf format>
+ * ...
+ * END
+ * ARG the argument is a simple parameter of the type *	specified.
+ * DBT the argument is a DBT (db.h) containing a length and pointer.
+ * PTR the argument is a pointer to the data type specified; the entire
+ *     type should be logged.
+ *
+ * There are a set of shell scripts of the form xxx.sh that generate c
+ * code and or h files to process these.  (This is probably better done
+ * in a single PERL script, but for now, this works.)
+ *
+ * The DB recovery system requires the following three fields appear in
+ * every record, and will assign them to the per-record-type structures
+ * as well as making them the first parameters to the appropriate logging
+ * call.
+ * rectype:	record-type, identifies the structure and log/read call
+ * txnid:	transaction id, a DBT in this implementation
+ * prev:	the last LSN for this transaction
+ */
+
+/*
+ * Use the argument of PREFIX as the prefix for all record types,
+ * routines, id numbers, etc.
+ */
+PREFIX	ham
+
+INCLUDE	#include "db_config.h"
+INCLUDE
+INCLUDE #ifndef NO_SYSTEM_INCLUDES
+INCLUDE #include <sys/types.h>
+INCLUDE
+INCLUDE #include <ctype.h>
+INCLUDE #include <errno.h>
+INCLUDE #include <string.h>
+INCLUDE #endif
+INCLUDE
+INCLUDE #include "db_int.h"
+INCLUDE #include "db_page.h"
+INCLUDE #include "db_dispatch.h"
+INCLUDE #include "db_am.h"
+INCLUDE #include "hash.h"
+INCLUDE #include "txn.h"
+INCLUDE
+
+/*
+ * HASH-insdel: used for hash to insert/delete a pair of entries onto a master
+ * page. The pair might be regular key/data pairs or they might be the
+ * structures that refer to off page items, duplicates or offpage duplicates.
+ *  opcode - PUTPAIR/DELPAIR + big masks
+ *  fileid - identifies the file referenced
+ *  pgno - page within file
+ *  ndx - index on the page of the item being added (item index)
+ *  pagelsn - lsn on the page before the update
+ *  key - the key being inserted
+ *  data - the data being inserted
+ */
+BEGIN insdel		21
+ARG	opcode		u_int32_t	lu
+ARG	fileid		int32_t		ld
+ARG	pgno		db_pgno_t	lu
+ARG	ndx		u_int32_t	lu
+POINTER	pagelsn		DB_LSN *	lu
+DBT	key		DBT		s
+DBT	data		DBT		s
+END
+
+/*
+ * Used to add and remove overflow pages.
+ * prev_pgno is the previous page that is going to get modified to
+ *	point to this one.  If this is the first page in a chain
+ *	then prev_pgno should be PGNO_INVALID.
+ * new_pgno is the page being allocated.
+ * next_pgno is the page that follows this one.  On allocation,
+ *	this should be PGNO_INVALID.  For deletes, it may exist.
+ * pagelsn is the old lsn on the page.
+ */
+BEGIN newpage		22
+ARG	opcode		u_int32_t	lu
+ARG	fileid		int32_t		ld
+ARG	prev_pgno	db_pgno_t	lu
+POINTER	prevlsn		DB_LSN *	lu
+ARG	new_pgno	db_pgno_t	lu
+POINTER	pagelsn		DB_LSN *	lu
+ARG	next_pgno	db_pgno_t	lu
+POINTER	nextlsn		DB_LSN *	lu
+END
+
+/*
+ * DEPRECATED in 3.0.
+ * Superceded by metagroup which allocates a group of new pages.
+ *
+ * Splitting requires two types of log messages.  The first logs the
+ * meta-data of the split.
+ *
+ * For the meta-data split
+ *	bucket: max_bucket in table before split
+ *	ovflpoint: overflow point before split.
+ *	spares: spares[ovflpoint] before split.
+ */
+DEPRECATED splitmeta	23
+ARG	fileid		int32_t		ld
+ARG	bucket		u_int32_t	lu
+ARG	ovflpoint	u_int32_t	lu
+ARG	spares		u_int32_t	lu
+POINTER	metalsn		DB_LSN *	lu
+END
+
+/*
+ * Splitting requires two types of log messages.  The second logs the
+ * data on the original page.  To redo the split, we have to visit the
+ * new page (pages) and add the items back on the page if they are not
+ * yet there.
+ */
+BEGIN splitdata		24
+ARG	fileid		int32_t		ld
+ARG	opcode		u_int32_t	lu
+ARG	pgno		db_pgno_t	lu
+DBT	pageimage	DBT		s
+POINTER	pagelsn		DB_LSN *	lu
+END
+
+/*
+ * HASH-replace: is used for hash to handle partial puts that only
+ * affect a single master page.
+ *  fileid - identifies the file referenced
+ *  pgno - page within file
+ *  ndx - index on the page of the item being modified (item index)
+ *  pagelsn - lsn on the page before the update
+ *  off - offset in the old item where the new item is going.
+ *  olditem - DBT that describes the part of the item being replaced.
+ *  newitem - DBT of the new item.
+ *  makedup - this was a replacement that made an item a duplicate.
+ */
+BEGIN replace		25
+ARG	fileid		int32_t		ld
+ARG	pgno		db_pgno_t	lu
+ARG	ndx		u_int32_t	lu
+POINTER	pagelsn		DB_LSN *	lu
+ARG	off		int32_t		ld
+DBT	olditem		DBT		s
+DBT	newitem		DBT		s
+ARG	makedup		u_int32_t	lu
+END
+
+/*
+ * DEPRECATED in 3.0.
+ * Hash now uses the btree allocation and deletion page routines.
+ *
+ * HASH-newpgno: is used to record getting/deleting a new page number.
+ * This doesn't require much data modification, just modifying the
+ * meta-data.
+ * pgno is the page being allocated/freed.
+ * free_pgno is the next_pgno on the free list.
+ * old_type was the type of a page being deallocated.
+ * old_pgno was the next page number before the deallocation.
+ */
+DEPRECATED newpgno	26
+ARG	opcode		u_int32_t	lu
+ARG	fileid		int32_t		ld
+ARG	pgno		db_pgno_t	lu
+ARG	free_pgno	db_pgno_t	lu
+ARG	old_type	u_int32_t	lu
+ARG	old_pgno	db_pgno_t	lu
+ARG	new_type	u_int32_t	lu
+POINTER	pagelsn		DB_LSN *	lu
+POINTER metalsn		DB_LSN *	lu
+END
+
+/*
+ * DEPRECATED in 3.0.
+ * Since we now pre-allocate the contiguous chunk of pages for a doubling,
+ * there is no big benefit to pre-allocating a few extra pages.  It used
+ * to be that the file was only physically as large as the current bucket,
+ * so if you were on a doubling of 16K, but were only on the first bucket
+ * of that 16K, the file was much shorter than it would be at the end of
+ * the doubling, so we didn't want to force overflow pages at the end of the
+ * 16K pages.  Since we now must allocate the 16K pages (because of sub
+ * databases), it's not a big deal to tack extra pages on at the end.
+ *
+ * ovfl: initialize a set of overflow pages.
+ */
+DEPRECATED ovfl		27
+ARG	fileid		int32_t		ld
+ARG	start_pgno	db_pgno_t	lu
+ARG	npages		u_int32_t	lu
+ARG	free_pgno	db_pgno_t	lu
+ARG	ovflpoint	u_int32_t	lu
+POINTER	metalsn		DB_LSN *	lu
+END
+
+/*
+ * Used when we empty the first page in a bucket and there are pages after
+ * it.  The page after it gets copied into the bucket page (since bucket
+ * pages have to be in fixed locations).
+ * pgno: the bucket page
+ * pagelsn: the old LSN on the bucket page
+ * next_pgno: the page number of the next page
+ * nnext_pgno: page after next_pgno (may need to change its prev)
+ * nnextlsn: the LSN of nnext_pgno.
+ */
+BEGIN copypage		28
+ARG	fileid		int32_t		ld
+ARG	pgno		db_pgno_t	lu
+POINTER	pagelsn		DB_LSN *	lu
+ARG	next_pgno	db_pgno_t	lu
+POINTER	nextlsn		DB_LSN *	lu
+ARG	nnext_pgno	db_pgno_t	lu
+POINTER	nnextlsn	DB_LSN *	lu
+DBT	page		DBT		s
+END
+
+/*
+ * This replaces the old splitmeta operation.  It behaves largely the same
+ * way, but it has enough information so that we can record a group allocation
+ * which we do now because of sub databases.  The number of pages allocated is
+ * always bucket + 1 pgno is the page number of the first newly allocated
+ * bucket.
+ * bucket:	Old maximum bucket number.
+ * pgno:	Page allocated to bucket + 1 (first newly allocated page)
+ * metalsn:	Lsn of the meta-data page.
+ * pagelsn:	Lsn of the maximum page allocated.
+ */
+BEGIN metagroup		29
+ARG	fileid		int32_t		ld
+ARG	bucket		u_int32_t	lu
+ARG	pgno		db_pgno_t	lu
+POINTER	metalsn		DB_LSN *	lu
+POINTER	pagelsn		DB_LSN *	lu
+END
+
+/*
+ * groupalloc
+ *
+ * This is used in conjunction with MPOOL_NEW_GROUP when we are creating
+ * a new database to make sure that we recreate or reclaim free pages
+ * when we allocate a chunk of contiguous ones during database creation.
+ *
+ * pgno: meta-data page number
+ * metalsn: meta-data lsn
+ * start_pgno: starting page number
+ * num: number of allocated pages
+ */
+DEPRECATED groupalloc1	30
+ARG	fileid		int32_t		ld
+ARG	pgno		db_pgno_t	lu
+POINTER	metalsn		DB_LSN *	lu
+POINTER	mmetalsn	DB_LSN *	lu
+ARG	start_pgno	db_pgno_t	lu
+ARG	num		u_int32_t	lu
+END
+
+DEPRECATED groupalloc2	31
+ARG	fileid		int32_t		ld
+POINTER	meta_lsn	DB_LSN *	lu
+POINTER	alloc_lsn	DB_LSN *	lu
+ARG	start_pgno	db_pgno_t	lu
+ARG	num		u_int32_t	lu
+ARG	free		db_pgno_t	lu
+END
+
+BEGIN groupalloc	32
+ARG	fileid		int32_t		ld
+POINTER	meta_lsn	DB_LSN *	lu
+ARG	start_pgno	db_pgno_t	lu
+ARG	num		u_int32_t	lu
+ARG	free		db_pgno_t	lu
+END
+
+/*
+ * Records for backing out cursor adjustment.
+ *   curadj - added or deleted a record or a dup
+ *	within a record.
+ *	pgno	- page that was effected
+ *	indx	- indx of recrod effected.
+ *	len	- if a dup its length.
+ *	dup_off	- if a dup its offset
+ *	add	- 1 if add 0 if delete
+ *	is_dup  - 1 if dup 0 otherwise.
+ *	order	- order assinged to this deleted record or dup.
+ *
+ *   chgpg - rmoved a page, move the records to a new page
+ *	mode	- CHGPG page was deleted or records move to new page.
+ *		- SPLIT we split a bucket
+ *		- DUP we convered to off page duplicates.
+ *	old_pgno, new_pgno - old and new page numbers.
+ *	old_index, new_index - old and new index numbers, NDX_INVALID if
+ *		it effects all records on the page.
+ */
+BEGIN curadj	33
+ARG	fileid		int32_t		ld
+ARG	pgno		db_pgno_t	lu
+ARG	indx		u_int32_t	lu
+ARG	len		u_int32_t	lu
+ARG	dup_off		u_int32_t	lu
+ARG	add		int		ld
+ARG	is_dup		int		ld
+ARG	order		u_int32_t	lu
+END
+
+BEGIN chgpg	34
+ARG	fileid		int32_t		ld
+ARG	mode		db_ham_mode	ld
+ARG	old_pgno	db_pgno_t	lu
+ARG	new_pgno	db_pgno_t	lu
+ARG	old_indx	u_int32_t	lu
+ARG	new_indx	u_int32_t	lu
+END
+
diff --git a/bdb/hash/hash_auto.c b/bdb/hash/hash_auto.c
new file mode 100644
index 00000000000..b6faf4f5645
--- /dev/null
+++ b/bdb/hash/hash_auto.c
@@ -0,0 +1,2023 @@
+/* Do not edit: automatically built by gen_rec.awk. */
+#include "db_config.h"
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "db_am.h"
+#include "hash.h"
+#include "txn.h"
+
+int
+__ham_insdel_log(dbenv, txnid, ret_lsnp, flags,
+	opcode, fileid, pgno, ndx, pagelsn, key,
+	data)
+	DB_ENV *dbenv;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	u_int32_t opcode;
+	int32_t fileid;
+	db_pgno_t pgno;
+	u_int32_t ndx;
+	DB_LSN * pagelsn;
+	const DBT *key;
+	const DBT *data;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t zero;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_ham_insdel;
+	if (txnid != NULL &&
+	    TAILQ_FIRST(&txnid->kids) != NULL &&
+	    (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+		return (ret);
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		ZERO_LSN(null_lsn);
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(opcode)
+	    + sizeof(fileid)
+	    + sizeof(pgno)
+	    + sizeof(ndx)
+	    + sizeof(*pagelsn)
+	    + sizeof(u_int32_t) + (key == NULL ? 0 : key->size)
+	    + sizeof(u_int32_t) + (data == NULL ? 0 : data->size);
+	if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
+		return (ret);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &opcode, sizeof(opcode));
+	bp += sizeof(opcode);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &pgno, sizeof(pgno));
+	bp += sizeof(pgno);
+	memcpy(bp, &ndx, sizeof(ndx));
+	bp += sizeof(ndx);
+	if (pagelsn != NULL)
+		memcpy(bp, pagelsn, sizeof(*pagelsn));
+	else
+		memset(bp, 0, sizeof(*pagelsn));
+	bp += sizeof(*pagelsn);
+	if (key == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &key->size, sizeof(key->size));
+		bp += sizeof(key->size);
+		memcpy(bp, key->data, key->size);
+		bp += key->size;
+	}
+	if (data == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &data->size, sizeof(data->size));
+		bp += sizeof(data->size);
+		memcpy(bp, data->data, data->size);
+		bp += data->size;
+	}
+	DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
+	ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__os_free(logrec.data, logrec.size);
+	return (ret);
+}
+
+int
+__ham_insdel_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_insdel_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_insdel_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_insdel: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\topcode: %lu\n", (u_long)argp->opcode);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tndx: %lu\n", (u_long)argp->ndx);
+	printf("\tpagelsn: [%lu][%lu]\n",
+	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+	printf("\tkey: ");
+	for (i = 0; i < argp->key.size; i++) {
+		ch = ((u_int8_t *)argp->key.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
+		else
+			printf("%#x ", ch);
+	}
+	printf("\n");
+	printf("\tdata: ");
+	for (i = 0; i < argp->data.size; i++) {
+		ch = ((u_int8_t *)argp->data.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
+		else
+			printf("%#x ", ch);
+	}
+	printf("\n");
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_insdel_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_insdel_args **argpp;
+{
+	__ham_insdel_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_insdel_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+	bp += sizeof(argp->opcode);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->ndx, bp, sizeof(argp->ndx));
+	bp += sizeof(argp->ndx);
+	memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+	bp += sizeof(argp->pagelsn);
+	memset(&argp->key, 0, sizeof(argp->key));
+	memcpy(&argp->key.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->key.data = bp;
+	bp += argp->key.size;
+	memset(&argp->data, 0, sizeof(argp->data));
+	memcpy(&argp->data.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->data.data = bp;
+	bp += argp->data.size;
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_newpage_log(dbenv, txnid, ret_lsnp, flags,
+	opcode, fileid, prev_pgno, prevlsn, new_pgno, pagelsn,
+	next_pgno, nextlsn)
+	DB_ENV *dbenv;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	u_int32_t opcode;
+	int32_t fileid;
+	db_pgno_t prev_pgno;
+	DB_LSN * prevlsn;
+	db_pgno_t new_pgno;
+	DB_LSN * pagelsn;
+	db_pgno_t next_pgno;
+	DB_LSN * nextlsn;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_ham_newpage;
+	if (txnid != NULL &&
+	    TAILQ_FIRST(&txnid->kids) != NULL &&
+	    (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+		return (ret);
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		ZERO_LSN(null_lsn);
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(opcode)
+	    + sizeof(fileid)
+	    + sizeof(prev_pgno)
+	    + sizeof(*prevlsn)
+	    + sizeof(new_pgno)
+	    + sizeof(*pagelsn)
+	    + sizeof(next_pgno)
+	    + sizeof(*nextlsn);
+	if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
+		return (ret);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &opcode, sizeof(opcode));
+	bp += sizeof(opcode);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &prev_pgno, sizeof(prev_pgno));
+	bp += sizeof(prev_pgno);
+	if (prevlsn != NULL)
+		memcpy(bp, prevlsn, sizeof(*prevlsn));
+	else
+		memset(bp, 0, sizeof(*prevlsn));
+	bp += sizeof(*prevlsn);
+	memcpy(bp, &new_pgno, sizeof(new_pgno));
+	bp += sizeof(new_pgno);
+	if (pagelsn != NULL)
+		memcpy(bp, pagelsn, sizeof(*pagelsn));
+	else
+		memset(bp, 0, sizeof(*pagelsn));
+	bp += sizeof(*pagelsn);
+	memcpy(bp, &next_pgno, sizeof(next_pgno));
+	bp += sizeof(next_pgno);
+	if (nextlsn != NULL)
+		memcpy(bp, nextlsn, sizeof(*nextlsn));
+	else
+		memset(bp, 0, sizeof(*nextlsn));
+	bp += sizeof(*nextlsn);
+	DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
+	ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__os_free(logrec.data, logrec.size);
+	return (ret);
+}
+
+int
+__ham_newpage_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_newpage_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_newpage_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_newpage: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\topcode: %lu\n", (u_long)argp->opcode);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tprev_pgno: %lu\n", (u_long)argp->prev_pgno);
+	printf("\tprevlsn: [%lu][%lu]\n",
+	    (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset);
+	printf("\tnew_pgno: %lu\n", (u_long)argp->new_pgno);
+	printf("\tpagelsn: [%lu][%lu]\n",
+	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+	printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno);
+	printf("\tnextlsn: [%lu][%lu]\n",
+	    (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_newpage_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_newpage_args **argpp;
+{
+	__ham_newpage_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_newpage_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+	bp += sizeof(argp->opcode);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->prev_pgno, bp, sizeof(argp->prev_pgno));
+	bp += sizeof(argp->prev_pgno);
+	memcpy(&argp->prevlsn, bp,  sizeof(argp->prevlsn));
+	bp += sizeof(argp->prevlsn);
+	memcpy(&argp->new_pgno, bp, sizeof(argp->new_pgno));
+	bp += sizeof(argp->new_pgno);
+	memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+	bp += sizeof(argp->pagelsn);
+	memcpy(&argp->next_pgno, bp, sizeof(argp->next_pgno));
+	bp += sizeof(argp->next_pgno);
+	memcpy(&argp->nextlsn, bp,  sizeof(argp->nextlsn));
+	bp += sizeof(argp->nextlsn);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_splitmeta_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_splitmeta_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_splitmeta_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_splitmeta: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tbucket: %lu\n", (u_long)argp->bucket);
+	printf("\tovflpoint: %lu\n", (u_long)argp->ovflpoint);
+	printf("\tspares: %lu\n", (u_long)argp->spares);
+	printf("\tmetalsn: [%lu][%lu]\n",
+	    (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_splitmeta_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_splitmeta_args **argpp;
+{
+	__ham_splitmeta_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_splitmeta_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->bucket, bp, sizeof(argp->bucket));
+	bp += sizeof(argp->bucket);
+	memcpy(&argp->ovflpoint, bp, sizeof(argp->ovflpoint));
+	bp += sizeof(argp->ovflpoint);
+	memcpy(&argp->spares, bp, sizeof(argp->spares));
+	bp += sizeof(argp->spares);
+	memcpy(&argp->metalsn, bp,  sizeof(argp->metalsn));
+	bp += sizeof(argp->metalsn);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_splitdata_log(dbenv, txnid, ret_lsnp, flags,
+	fileid, opcode, pgno, pageimage, pagelsn)
+	DB_ENV *dbenv;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	int32_t fileid;
+	u_int32_t opcode;
+	db_pgno_t pgno;
+	const DBT *pageimage;
+	DB_LSN * pagelsn;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t zero;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_ham_splitdata;
+	if (txnid != NULL &&
+	    TAILQ_FIRST(&txnid->kids) != NULL &&
+	    (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+		return (ret);
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		ZERO_LSN(null_lsn);
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(opcode)
+	    + sizeof(pgno)
+	    + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size)
+	    + sizeof(*pagelsn);
+	if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
+		return (ret);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &opcode, sizeof(opcode));
+	bp += sizeof(opcode);
+	memcpy(bp, &pgno, sizeof(pgno));
+	bp += sizeof(pgno);
+	if (pageimage == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &pageimage->size, sizeof(pageimage->size));
+		bp += sizeof(pageimage->size);
+		memcpy(bp, pageimage->data, pageimage->size);
+		bp += pageimage->size;
+	}
+	if (pagelsn != NULL)
+		memcpy(bp, pagelsn, sizeof(*pagelsn));
+	else
+		memset(bp, 0, sizeof(*pagelsn));
+	bp += sizeof(*pagelsn);
+	DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
+	ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__os_free(logrec.data, logrec.size);
+	return (ret);
+}
+
+int
+__ham_splitdata_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_splitdata_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_splitdata_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_splitdata: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\topcode: %lu\n", (u_long)argp->opcode);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tpageimage: ");
+	for (i = 0; i < argp->pageimage.size; i++) {
+		ch = ((u_int8_t *)argp->pageimage.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
+		else
+			printf("%#x ", ch);
+	}
+	printf("\n");
+	printf("\tpagelsn: [%lu][%lu]\n",
+	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_splitdata_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_splitdata_args **argpp;
+{
+	__ham_splitdata_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_splitdata_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+	bp += sizeof(argp->opcode);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memset(&argp->pageimage, 0, sizeof(argp->pageimage));
+	memcpy(&argp->pageimage.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->pageimage.data = bp;
+	bp += argp->pageimage.size;
+	memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+	bp += sizeof(argp->pagelsn);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_replace_log(dbenv, txnid, ret_lsnp, flags,
+	fileid, pgno, ndx, pagelsn, off, olditem,
+	newitem, makedup)
+	DB_ENV *dbenv;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	int32_t fileid;
+	db_pgno_t pgno;
+	u_int32_t ndx;
+	DB_LSN * pagelsn;
+	int32_t off;
+	const DBT *olditem;
+	const DBT *newitem;
+	u_int32_t makedup;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t zero;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_ham_replace;
+	if (txnid != NULL &&
+	    TAILQ_FIRST(&txnid->kids) != NULL &&
+	    (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+		return (ret);
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		ZERO_LSN(null_lsn);
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(pgno)
+	    + sizeof(ndx)
+	    + sizeof(*pagelsn)
+	    + sizeof(off)
+	    + sizeof(u_int32_t) + (olditem == NULL ? 0 : olditem->size)
+	    + sizeof(u_int32_t) + (newitem == NULL ? 0 : newitem->size)
+	    + sizeof(makedup);
+	if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
+		return (ret);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &pgno, sizeof(pgno));
+	bp += sizeof(pgno);
+	memcpy(bp, &ndx, sizeof(ndx));
+	bp += sizeof(ndx);
+	if (pagelsn != NULL)
+		memcpy(bp, pagelsn, sizeof(*pagelsn));
+	else
+		memset(bp, 0, sizeof(*pagelsn));
+	bp += sizeof(*pagelsn);
+	memcpy(bp, &off, sizeof(off));
+	bp += sizeof(off);
+	if (olditem == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &olditem->size, sizeof(olditem->size));
+		bp += sizeof(olditem->size);
+		memcpy(bp, olditem->data, olditem->size);
+		bp += olditem->size;
+	}
+	if (newitem == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &newitem->size, sizeof(newitem->size));
+		bp += sizeof(newitem->size);
+		memcpy(bp, newitem->data, newitem->size);
+		bp += newitem->size;
+	}
+	memcpy(bp, &makedup, sizeof(makedup));
+	bp += sizeof(makedup);
+	DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
+	ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__os_free(logrec.data, logrec.size);
+	return (ret);
+}
+
+int
+__ham_replace_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_replace_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_replace_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_replace: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tndx: %lu\n", (u_long)argp->ndx);
+	printf("\tpagelsn: [%lu][%lu]\n",
+	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+	printf("\toff: %ld\n", (long)argp->off);
+	printf("\tolditem: ");
+	for (i = 0; i < argp->olditem.size; i++) {
+		ch = ((u_int8_t *)argp->olditem.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
+		else
+			printf("%#x ", ch);
+	}
+	printf("\n");
+	printf("\tnewitem: ");
+	for (i = 0; i < argp->newitem.size; i++) {
+		ch = ((u_int8_t *)argp->newitem.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
+		else
+			printf("%#x ", ch);
+	}
+	printf("\n");
+	printf("\tmakedup: %lu\n", (u_long)argp->makedup);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_replace_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_replace_args **argpp;
+{
+	__ham_replace_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_replace_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->ndx, bp, sizeof(argp->ndx));
+	bp += sizeof(argp->ndx);
+	memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+	bp += sizeof(argp->pagelsn);
+	memcpy(&argp->off, bp, sizeof(argp->off));
+	bp += sizeof(argp->off);
+	memset(&argp->olditem, 0, sizeof(argp->olditem));
+	memcpy(&argp->olditem.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->olditem.data = bp;
+	bp += argp->olditem.size;
+	memset(&argp->newitem, 0, sizeof(argp->newitem));
+	memcpy(&argp->newitem.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->newitem.data = bp;
+	bp += argp->newitem.size;
+	memcpy(&argp->makedup, bp, sizeof(argp->makedup));
+	bp += sizeof(argp->makedup);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_newpgno_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_newpgno_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_newpgno_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_newpgno: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\topcode: %lu\n", (u_long)argp->opcode);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tfree_pgno: %lu\n", (u_long)argp->free_pgno);
+	printf("\told_type: %lu\n", (u_long)argp->old_type);
+	printf("\told_pgno: %lu\n", (u_long)argp->old_pgno);
+	printf("\tnew_type: %lu\n", (u_long)argp->new_type);
+	printf("\tpagelsn: [%lu][%lu]\n",
+	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+	printf("\tmetalsn: [%lu][%lu]\n",
+	    (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_newpgno_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_newpgno_args **argpp;
+{
+	__ham_newpgno_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_newpgno_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+	bp += sizeof(argp->opcode);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->free_pgno, bp, sizeof(argp->free_pgno));
+	bp += sizeof(argp->free_pgno);
+	memcpy(&argp->old_type, bp, sizeof(argp->old_type));
+	bp += sizeof(argp->old_type);
+	memcpy(&argp->old_pgno, bp, sizeof(argp->old_pgno));
+	bp += sizeof(argp->old_pgno);
+	memcpy(&argp->new_type, bp, sizeof(argp->new_type));
+	bp += sizeof(argp->new_type);
+	memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+	bp += sizeof(argp->pagelsn);
+	memcpy(&argp->metalsn, bp,  sizeof(argp->metalsn));
+	bp += sizeof(argp->metalsn);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_ovfl_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_ovfl_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_ovfl_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_ovfl: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno);
+	printf("\tnpages: %lu\n", (u_long)argp->npages);
+	printf("\tfree_pgno: %lu\n", (u_long)argp->free_pgno);
+	printf("\tovflpoint: %lu\n", (u_long)argp->ovflpoint);
+	printf("\tmetalsn: [%lu][%lu]\n",
+	    (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_ovfl_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_ovfl_args **argpp;
+{
+	__ham_ovfl_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_ovfl_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->start_pgno, bp, sizeof(argp->start_pgno));
+	bp += sizeof(argp->start_pgno);
+	memcpy(&argp->npages, bp, sizeof(argp->npages));
+	bp += sizeof(argp->npages);
+	memcpy(&argp->free_pgno, bp, sizeof(argp->free_pgno));
+	bp += sizeof(argp->free_pgno);
+	memcpy(&argp->ovflpoint, bp, sizeof(argp->ovflpoint));
+	bp += sizeof(argp->ovflpoint);
+	memcpy(&argp->metalsn, bp,  sizeof(argp->metalsn));
+	bp += sizeof(argp->metalsn);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_copypage_log(dbenv, txnid, ret_lsnp, flags,
+	fileid, pgno, pagelsn, next_pgno, nextlsn, nnext_pgno,
+	nnextlsn, page)
+	DB_ENV *dbenv;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	int32_t fileid;
+	db_pgno_t pgno;
+	DB_LSN * pagelsn;
+	db_pgno_t next_pgno;
+	DB_LSN * nextlsn;
+	db_pgno_t nnext_pgno;
+	DB_LSN * nnextlsn;
+	const DBT *page;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t zero;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_ham_copypage;
+	if (txnid != NULL &&
+	    TAILQ_FIRST(&txnid->kids) != NULL &&
+	    (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+		return (ret);
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		ZERO_LSN(null_lsn);
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(pgno)
+	    + sizeof(*pagelsn)
+	    + sizeof(next_pgno)
+	    + sizeof(*nextlsn)
+	    + sizeof(nnext_pgno)
+	    + sizeof(*nnextlsn)
+	    + sizeof(u_int32_t) + (page == NULL ? 0 : page->size);
+	if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
+		return (ret);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &pgno, sizeof(pgno));
+	bp += sizeof(pgno);
+	if (pagelsn != NULL)
+		memcpy(bp, pagelsn, sizeof(*pagelsn));
+	else
+		memset(bp, 0, sizeof(*pagelsn));
+	bp += sizeof(*pagelsn);
+	memcpy(bp, &next_pgno, sizeof(next_pgno));
+	bp += sizeof(next_pgno);
+	if (nextlsn != NULL)
+		memcpy(bp, nextlsn, sizeof(*nextlsn));
+	else
+		memset(bp, 0, sizeof(*nextlsn));
+	bp += sizeof(*nextlsn);
+	memcpy(bp, &nnext_pgno, sizeof(nnext_pgno));
+	bp += sizeof(nnext_pgno);
+	if (nnextlsn != NULL)
+		memcpy(bp, nnextlsn, sizeof(*nnextlsn));
+	else
+		memset(bp, 0, sizeof(*nnextlsn));
+	bp += sizeof(*nnextlsn);
+	if (page == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &page->size, sizeof(page->size));
+		bp += sizeof(page->size);
+		memcpy(bp, page->data, page->size);
+		bp += page->size;
+	}
+	DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
+	ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__os_free(logrec.data, logrec.size);
+	return (ret);
+}
+
+int
+__ham_copypage_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_copypage_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_copypage_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_copypage: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tpagelsn: [%lu][%lu]\n",
+	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+	printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno);
+	printf("\tnextlsn: [%lu][%lu]\n",
+	    (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
+	printf("\tnnext_pgno: %lu\n", (u_long)argp->nnext_pgno);
+	printf("\tnnextlsn: [%lu][%lu]\n",
+	    (u_long)argp->nnextlsn.file, (u_long)argp->nnextlsn.offset);
+	printf("\tpage: ");
+	for (i = 0; i < argp->page.size; i++) {
+		ch = ((u_int8_t *)argp->page.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
+		else
+			printf("%#x ", ch);
+	}
+	printf("\n");
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_copypage_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_copypage_args **argpp;
+{
+	__ham_copypage_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_copypage_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+	bp += sizeof(argp->pagelsn);
+	memcpy(&argp->next_pgno, bp, sizeof(argp->next_pgno));
+	bp += sizeof(argp->next_pgno);
+	memcpy(&argp->nextlsn, bp,  sizeof(argp->nextlsn));
+	bp += sizeof(argp->nextlsn);
+	memcpy(&argp->nnext_pgno, bp, sizeof(argp->nnext_pgno));
+	bp += sizeof(argp->nnext_pgno);
+	memcpy(&argp->nnextlsn, bp,  sizeof(argp->nnextlsn));
+	bp += sizeof(argp->nnextlsn);
+	memset(&argp->page, 0, sizeof(argp->page));
+	memcpy(&argp->page.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->page.data = bp;
+	bp += argp->page.size;
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_metagroup_log(dbenv, txnid, ret_lsnp, flags,
+	fileid, bucket, pgno, metalsn, pagelsn)
+	DB_ENV *dbenv;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	int32_t fileid;
+	u_int32_t bucket;
+	db_pgno_t pgno;
+	DB_LSN * metalsn;
+	DB_LSN * pagelsn;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_ham_metagroup;
+	if (txnid != NULL &&
+	    TAILQ_FIRST(&txnid->kids) != NULL &&
+	    (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+		return (ret);
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		ZERO_LSN(null_lsn);
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(bucket)
+	    + sizeof(pgno)
+	    + sizeof(*metalsn)
+	    + sizeof(*pagelsn);
+	if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
+		return (ret);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &bucket, sizeof(bucket));
+	bp += sizeof(bucket);
+	memcpy(bp, &pgno, sizeof(pgno));
+	bp += sizeof(pgno);
+	if (metalsn != NULL)
+		memcpy(bp, metalsn, sizeof(*metalsn));
+	else
+		memset(bp, 0, sizeof(*metalsn));
+	bp += sizeof(*metalsn);
+	if (pagelsn != NULL)
+		memcpy(bp, pagelsn, sizeof(*pagelsn));
+	else
+		memset(bp, 0, sizeof(*pagelsn));
+	bp += sizeof(*pagelsn);
+	DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
+	ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__os_free(logrec.data, logrec.size);
+	return (ret);
+}
+
+int
+__ham_metagroup_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_metagroup_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_metagroup_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_metagroup: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tbucket: %lu\n", (u_long)argp->bucket);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tmetalsn: [%lu][%lu]\n",
+	    (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+	printf("\tpagelsn: [%lu][%lu]\n",
+	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_metagroup_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_metagroup_args **argpp;
+{
+	__ham_metagroup_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_metagroup_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->bucket, bp, sizeof(argp->bucket));
+	bp += sizeof(argp->bucket);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->metalsn, bp,  sizeof(argp->metalsn));
+	bp += sizeof(argp->metalsn);
+	memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+	bp += sizeof(argp->pagelsn);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_groupalloc1_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_groupalloc1_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_groupalloc1_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_groupalloc1: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tmetalsn: [%lu][%lu]\n",
+	    (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+	printf("\tmmetalsn: [%lu][%lu]\n",
+	    (u_long)argp->mmetalsn.file, (u_long)argp->mmetalsn.offset);
+	printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno);
+	printf("\tnum: %lu\n", (u_long)argp->num);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_groupalloc1_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_groupalloc1_args **argpp;
+{
+	__ham_groupalloc1_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_groupalloc1_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->metalsn, bp,  sizeof(argp->metalsn));
+	bp += sizeof(argp->metalsn);
+	memcpy(&argp->mmetalsn, bp,  sizeof(argp->mmetalsn));
+	bp += sizeof(argp->mmetalsn);
+	memcpy(&argp->start_pgno, bp, sizeof(argp->start_pgno));
+	bp += sizeof(argp->start_pgno);
+	memcpy(&argp->num, bp, sizeof(argp->num));
+	bp += sizeof(argp->num);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_groupalloc2_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_groupalloc2_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_groupalloc2_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_groupalloc2: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tmeta_lsn: [%lu][%lu]\n",
+	    (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
+	printf("\talloc_lsn: [%lu][%lu]\n",
+	    (u_long)argp->alloc_lsn.file, (u_long)argp->alloc_lsn.offset);
+	printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno);
+	printf("\tnum: %lu\n", (u_long)argp->num);
+	printf("\tfree: %lu\n", (u_long)argp->free);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_groupalloc2_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_groupalloc2_args **argpp;
+{
+	__ham_groupalloc2_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_groupalloc2_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->meta_lsn, bp,  sizeof(argp->meta_lsn));
+	bp += sizeof(argp->meta_lsn);
+	memcpy(&argp->alloc_lsn, bp,  sizeof(argp->alloc_lsn));
+	bp += sizeof(argp->alloc_lsn);
+	memcpy(&argp->start_pgno, bp, sizeof(argp->start_pgno));
+	bp += sizeof(argp->start_pgno);
+	memcpy(&argp->num, bp, sizeof(argp->num));
+	bp += sizeof(argp->num);
+	memcpy(&argp->free, bp, sizeof(argp->free));
+	bp += sizeof(argp->free);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_groupalloc_log(dbenv, txnid, ret_lsnp, flags,
+	fileid, meta_lsn, start_pgno, num, free)
+	DB_ENV *dbenv;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	int32_t fileid;
+	DB_LSN * meta_lsn;
+	db_pgno_t start_pgno;
+	u_int32_t num;
+	db_pgno_t free;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_ham_groupalloc;
+	if (txnid != NULL &&
+	    TAILQ_FIRST(&txnid->kids) != NULL &&
+	    (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+		return (ret);
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		ZERO_LSN(null_lsn);
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(*meta_lsn)
+	    + sizeof(start_pgno)
+	    + sizeof(num)
+	    + sizeof(free);
+	if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
+		return (ret);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	if (meta_lsn != NULL)
+		memcpy(bp, meta_lsn, sizeof(*meta_lsn));
+	else
+		memset(bp, 0, sizeof(*meta_lsn));
+	bp += sizeof(*meta_lsn);
+	memcpy(bp, &start_pgno, sizeof(start_pgno));
+	bp += sizeof(start_pgno);
+	memcpy(bp, &num, sizeof(num));
+	bp += sizeof(num);
+	memcpy(bp, &free, sizeof(free));
+	bp += sizeof(free);
+	DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
+	ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__os_free(logrec.data, logrec.size);
+	return (ret);
+}
+
+int
+__ham_groupalloc_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_groupalloc_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_groupalloc_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_groupalloc: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tmeta_lsn: [%lu][%lu]\n",
+	    (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
+	printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno);
+	printf("\tnum: %lu\n", (u_long)argp->num);
+	printf("\tfree: %lu\n", (u_long)argp->free);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_groupalloc_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_groupalloc_args **argpp;
+{
+	__ham_groupalloc_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_groupalloc_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->meta_lsn, bp,  sizeof(argp->meta_lsn));
+	bp += sizeof(argp->meta_lsn);
+	memcpy(&argp->start_pgno, bp, sizeof(argp->start_pgno));
+	bp += sizeof(argp->start_pgno);
+	memcpy(&argp->num, bp, sizeof(argp->num));
+	bp += sizeof(argp->num);
+	memcpy(&argp->free, bp, sizeof(argp->free));
+	bp += sizeof(argp->free);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_curadj_log(dbenv, txnid, ret_lsnp, flags,
+	fileid, pgno, indx, len, dup_off, add,
+	is_dup, order)
+	DB_ENV *dbenv;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	int32_t fileid;
+	db_pgno_t pgno;
+	u_int32_t indx;
+	u_int32_t len;
+	u_int32_t dup_off;
+	int add;
+	int is_dup;
+	u_int32_t order;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_ham_curadj;
+	if (txnid != NULL &&
+	    TAILQ_FIRST(&txnid->kids) != NULL &&
+	    (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+		return (ret);
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		ZERO_LSN(null_lsn);
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(pgno)
+	    + sizeof(indx)
+	    + sizeof(len)
+	    + sizeof(dup_off)
+	    + sizeof(add)
+	    + sizeof(is_dup)
+	    + sizeof(order);
+	if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
+		return (ret);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &pgno, sizeof(pgno));
+	bp += sizeof(pgno);
+	memcpy(bp, &indx, sizeof(indx));
+	bp += sizeof(indx);
+	memcpy(bp, &len, sizeof(len));
+	bp += sizeof(len);
+	memcpy(bp, &dup_off, sizeof(dup_off));
+	bp += sizeof(dup_off);
+	memcpy(bp, &add, sizeof(add));
+	bp += sizeof(add);
+	memcpy(bp, &is_dup, sizeof(is_dup));
+	bp += sizeof(is_dup);
+	memcpy(bp, &order, sizeof(order));
+	bp += sizeof(order);
+	DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
+	ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__os_free(logrec.data, logrec.size);
+	return (ret);
+}
+
+int
+__ham_curadj_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_curadj_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_curadj_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_curadj: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tindx: %lu\n", (u_long)argp->indx);
+	printf("\tlen: %lu\n", (u_long)argp->len);
+	printf("\tdup_off: %lu\n", (u_long)argp->dup_off);
+	printf("\tadd: %ld\n", (long)argp->add);
+	printf("\tis_dup: %ld\n", (long)argp->is_dup);
+	printf("\torder: %lu\n", (u_long)argp->order);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_curadj_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_curadj_args **argpp;
+{
+	__ham_curadj_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_curadj_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->indx, bp, sizeof(argp->indx));
+	bp += sizeof(argp->indx);
+	memcpy(&argp->len, bp, sizeof(argp->len));
+	bp += sizeof(argp->len);
+	memcpy(&argp->dup_off, bp, sizeof(argp->dup_off));
+	bp += sizeof(argp->dup_off);
+	memcpy(&argp->add, bp, sizeof(argp->add));
+	bp += sizeof(argp->add);
+	memcpy(&argp->is_dup, bp, sizeof(argp->is_dup));
+	bp += sizeof(argp->is_dup);
+	memcpy(&argp->order, bp, sizeof(argp->order));
+	bp += sizeof(argp->order);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_chgpg_log(dbenv, txnid, ret_lsnp, flags,
+	fileid, mode, old_pgno, new_pgno, old_indx, new_indx)
+	DB_ENV *dbenv;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	int32_t fileid;
+	db_ham_mode mode;
+	db_pgno_t old_pgno;
+	db_pgno_t new_pgno;
+	u_int32_t old_indx;
+	u_int32_t new_indx;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_ham_chgpg;
+	if (txnid != NULL &&
+	    TAILQ_FIRST(&txnid->kids) != NULL &&
+	    (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+		return (ret);
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		ZERO_LSN(null_lsn);
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(mode)
+	    + sizeof(old_pgno)
+	    + sizeof(new_pgno)
+	    + sizeof(old_indx)
+	    + sizeof(new_indx);
+	if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
+		return (ret);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &mode, sizeof(mode));
+	bp += sizeof(mode);
+	memcpy(bp, &old_pgno, sizeof(old_pgno));
+	bp += sizeof(old_pgno);
+	memcpy(bp, &new_pgno, sizeof(new_pgno));
+	bp += sizeof(new_pgno);
+	memcpy(bp, &old_indx, sizeof(old_indx));
+	bp += sizeof(old_indx);
+	memcpy(bp, &new_indx, sizeof(new_indx));
+	bp += sizeof(new_indx);
+	DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
+	ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__os_free(logrec.data, logrec.size);
+	return (ret);
+}
+
+int
+__ham_chgpg_print(dbenv, dbtp, lsnp, notused2, notused3)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops notused2;
+	void *notused3;
+{
+	__ham_chgpg_args *argp;
+	u_int32_t i;
+	u_int ch;
+	int ret;
+
+	i = 0;
+	ch = 0;
+	notused2 = DB_TXN_ABORT;
+	notused3 = NULL;
+
+	if ((ret = __ham_chgpg_read(dbenv, dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_chgpg: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %ld\n", (long)argp->fileid);
+	printf("\tmode: %ld\n", (long)argp->mode);
+	printf("\told_pgno: %lu\n", (u_long)argp->old_pgno);
+	printf("\tnew_pgno: %lu\n", (u_long)argp->new_pgno);
+	printf("\told_indx: %lu\n", (u_long)argp->old_indx);
+	printf("\tnew_indx: %lu\n", (u_long)argp->new_indx);
+	printf("\n");
+	__os_free(argp, 0);
+	return (0);
+}
+
+int
+__ham_chgpg_read(dbenv, recbuf, argpp)
+	DB_ENV *dbenv;
+	void *recbuf;
+	__ham_chgpg_args **argpp;
+{
+	__ham_chgpg_args *argp;
+	u_int8_t *bp;
+	int ret;
+
+	ret = __os_malloc(dbenv, sizeof(__ham_chgpg_args) +
+	    sizeof(DB_TXN), NULL, &argp);
+	if (ret != 0)
+		return (ret);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->mode, bp, sizeof(argp->mode));
+	bp += sizeof(argp->mode);
+	memcpy(&argp->old_pgno, bp, sizeof(argp->old_pgno));
+	bp += sizeof(argp->old_pgno);
+	memcpy(&argp->new_pgno, bp, sizeof(argp->new_pgno));
+	bp += sizeof(argp->new_pgno);
+	memcpy(&argp->old_indx, bp, sizeof(argp->old_indx));
+	bp += sizeof(argp->old_indx);
+	memcpy(&argp->new_indx, bp, sizeof(argp->new_indx));
+	bp += sizeof(argp->new_indx);
+	*argpp = argp;
+	return (0);
+}
+
+int
+__ham_init_print(dbenv)
+	DB_ENV *dbenv;
+{
+	int ret;
+
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_insdel_print, DB_ham_insdel)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_newpage_print, DB_ham_newpage)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_splitmeta_print, DB_ham_splitmeta)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_splitdata_print, DB_ham_splitdata)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_replace_print, DB_ham_replace)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_newpgno_print, DB_ham_newpgno)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_ovfl_print, DB_ham_ovfl)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_copypage_print, DB_ham_copypage)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_metagroup_print, DB_ham_metagroup)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_groupalloc1_print, DB_ham_groupalloc1)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_groupalloc2_print, DB_ham_groupalloc2)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_groupalloc_print, DB_ham_groupalloc)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_curadj_print, DB_ham_curadj)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_chgpg_print, DB_ham_chgpg)) != 0)
+		return (ret);
+	return (0);
+}
+
+int
+__ham_init_recover(dbenv)
+	DB_ENV *dbenv;
+{
+	int ret;
+
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_insdel_recover, DB_ham_insdel)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_newpage_recover, DB_ham_newpage)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __deprecated_recover, DB_ham_splitmeta)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_splitdata_recover, DB_ham_splitdata)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_replace_recover, DB_ham_replace)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __deprecated_recover, DB_ham_newpgno)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __deprecated_recover, DB_ham_ovfl)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_copypage_recover, DB_ham_copypage)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_metagroup_recover, DB_ham_metagroup)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __deprecated_recover, DB_ham_groupalloc1)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __deprecated_recover, DB_ham_groupalloc2)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_groupalloc_recover, DB_ham_groupalloc)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_curadj_recover, DB_ham_curadj)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_chgpg_recover, DB_ham_chgpg)) != 0)
+		return (ret);
+	return (0);
+}
+
diff --git a/bdb/hash/hash_conv.c b/bdb/hash/hash_conv.c
new file mode 100644
index 00000000000..30d17a6164d
--- /dev/null
+++ b/bdb/hash/hash_conv.c
@@ -0,0 +1,112 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_conv.c,v 11.5 2000/03/31 00:30:32 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "hash.h"
+
+/*
+ * __ham_pgin --
+ *	Convert host-specific page layout from the host-independent format
+ *	stored on disk.
+ *
+ * PUBLIC: int __ham_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *));
+ */
+int
+__ham_pgin(dbenv, pg, pp, cookie)
+	DB_ENV *dbenv;
+	db_pgno_t pg;
+	void *pp;
+	DBT *cookie;
+{
+	DB_PGINFO *pginfo;
+	PAGE *h;
+
+	h = pp;
+	pginfo = (DB_PGINFO *)cookie->data;
+
+	/*
+	 * The hash access method does blind reads of pages, causing them
+	 * to be created.  If the type field isn't set it's one of them,
+	 * initialize the rest of the page and return.
+	 */
+	if (h->type != P_HASHMETA && h->pgno == PGNO_INVALID) {
+		P_INIT(pp, pginfo->db_pagesize,
+		    pg, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+		return (0);
+	}
+
+	if (!pginfo->needswap)
+		return (0);
+
+	return (h->type == P_HASHMETA ?  __ham_mswap(pp) :
+	    __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 1));
+}
+
+/*
+ * __ham_pgout --
+ *	Convert host-specific page layout to the host-independent format
+ *	stored on disk.
+ *
+ * PUBLIC: int __ham_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *));
+ */
+int
+__ham_pgout(dbenv, pg, pp, cookie)
+	DB_ENV *dbenv;
+	db_pgno_t pg;
+	void *pp;
+	DBT *cookie;
+{
+	DB_PGINFO *pginfo;
+	PAGE *h;
+
+	pginfo = (DB_PGINFO *)cookie->data;
+	if (!pginfo->needswap)
+		return (0);
+
+	h = pp;
+	return (h->type == P_HASHMETA ?  __ham_mswap(pp) :
+	     __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 0));
+}
+
+/*
+ * __ham_mswap --
+ *	Swap the bytes on the hash metadata page.
+ *
+ * PUBLIC: int __ham_mswap __P((void *));
+ */
+int
+__ham_mswap(pg)
+	void *pg;
+{
+	u_int8_t *p;
+	int i;
+
+	__db_metaswap(pg);
+
+	p = (u_int8_t *)pg + sizeof(DBMETA);
+
+	SWAP32(p);		/* max_bucket */
+	SWAP32(p);		/* high_mask */
+	SWAP32(p);		/* low_mask */
+	SWAP32(p);		/* ffactor */
+	SWAP32(p);		/* nelem */
+	SWAP32(p);		/* h_charkey */
+	for (i = 0; i < NCACHED; ++i)
+		SWAP32(p);	/* spares */
+	return (0);
+}
diff --git a/bdb/hash/hash_dup.c b/bdb/hash/hash_dup.c
new file mode 100644
index 00000000000..f5fbf4f472f
--- /dev/null
+++ b/bdb/hash/hash_dup.c
@@ -0,0 +1,805 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_dup.c,v 11.49 2000/12/21 21:54:35 margo Exp $";
+#endif /* not lint */
+
+/*
+ * PACKAGE:  hashing
+ *
+ * DESCRIPTION:
+ *      Manipulation of duplicates for the hash package.
+ *
+ * ROUTINES:
+ *
+ * External
+ *      __add_dup
+ * Internal
+ */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "hash.h"
+#include "btree.h"
+#include "txn.h"
+
+static int __ham_check_move __P((DBC *, u_int32_t));
+static int __ham_dcursor __P((DBC *, db_pgno_t, u_int32_t));
+
+/*
+ * Called from hash_access to add a duplicate key. nval is the new
+ * value that we want to add.  The flags correspond to the flag values
+ * to cursor_put indicating where to add the new element.
+ * There are 4 cases.
+ * Case 1: The existing duplicate set already resides on a separate page.
+ *	   We return and let the common code handle this.
+ * Case 2: The element is small enough to just be added to the existing set.
+ * Case 3: The element is large enough to be a big item, so we're going to
+ *	   have to push the set onto a new page.
+ * Case 4: The element is large enough to push the duplicate set onto a
+ *	   separate page.
+ *
+ * PUBLIC: int __ham_add_dup __P((DBC *, DBT *, u_int32_t, db_pgno_t *));
+ */
+int
+__ham_add_dup(dbc, nval, flags, pgnop)
+	DBC *dbc;
+	DBT *nval;
+	u_int32_t flags;
+	db_pgno_t *pgnop;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	DBT pval, tmp_val;
+	u_int32_t add_bytes, new_size;
+	int cmp, ret;
+	u_int8_t *hk;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	DB_ASSERT(flags != DB_CURRENT);
+
+	add_bytes = nval->size +
+	    (F_ISSET(nval, DB_DBT_PARTIAL) ? nval->doff : 0);
+	add_bytes = DUP_SIZE(add_bytes);
+
+	if ((ret = __ham_check_move(dbc, add_bytes)) != 0)
+		return (ret);
+
+	/*
+	 * Check if resulting duplicate set is going to need to go
+	 * onto a separate duplicate page.  If so, convert the
+	 * duplicate set and add the new one.  After conversion,
+	 * hcp->dndx is the first free ndx or the index of the
+	 * current pointer into the duplicate set.
+	 */
+	hk = H_PAIRDATA(hcp->page, hcp->indx);
+	/* Add the len bytes to the current singleton. */
+	if (HPAGE_PTYPE(hk) != H_DUPLICATE)
+		add_bytes += DUP_SIZE(0);
+	new_size =
+	    LEN_HKEYDATA(hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)) +
+	    add_bytes;
+
+	/*
+	 * We convert to off-page duplicates if the item is a big item,
+	 * the addition of the new item will make the set large, or
+	 * if there isn't enough room on this page to add the next item.
+	 */
+	if (HPAGE_PTYPE(hk) != H_OFFDUP &&
+	    (HPAGE_PTYPE(hk) == H_OFFPAGE || ISBIG(hcp, new_size) ||
+	    add_bytes > P_FREESPACE(hcp->page))) {
+
+		if ((ret = __ham_dup_convert(dbc)) != 0)
+			return (ret);
+		return (hcp->opd->c_am_put(hcp->opd,
+		    NULL, nval, flags, NULL));
+	}
+
+	/* There are two separate cases here: on page and off page. */
+	if (HPAGE_PTYPE(hk) != H_OFFDUP) {
+		if (HPAGE_PTYPE(hk) != H_DUPLICATE) {
+			pval.flags = 0;
+			pval.data = HKEYDATA_DATA(hk);
+			pval.size = LEN_HDATA(hcp->page, dbp->pgsize,
+			    hcp->indx);
+			if ((ret = __ham_make_dup(dbp->dbenv,
+			    &pval, &tmp_val, &dbc->rdata.data,
+			    &dbc->rdata.ulen)) != 0 || (ret =
+			    __ham_replpair(dbc, &tmp_val, 1)) != 0)
+				return (ret);
+			hk = H_PAIRDATA(hcp->page, hcp->indx);
+			HPAGE_PTYPE(hk) = H_DUPLICATE;
+
+			/*
+			 * Update the cursor position since we now are in
+			 * duplicates.
+			 */
+			F_SET(hcp, H_ISDUP);
+			hcp->dup_off = 0;
+			hcp->dup_len = pval.size;
+			hcp->dup_tlen = DUP_SIZE(hcp->dup_len);
+		}
+
+		/* Now make the new entry a duplicate. */
+		if ((ret = __ham_make_dup(dbp->dbenv, nval,
+		    &tmp_val, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
+			return (ret);
+
+		tmp_val.dlen = 0;
+		switch (flags) {			/* On page. */
+		case DB_KEYFIRST:
+		case DB_KEYLAST:
+		case DB_NODUPDATA:
+			if (dbp->dup_compare != NULL) {
+				__ham_dsearch(dbc, nval, &tmp_val.doff, &cmp);
+
+				/* dup dups are not supported w/ sorted dups */
+				if (cmp == 0)
+					return (__db_duperr(dbp, flags));
+			} else {
+				hcp->dup_tlen = LEN_HDATA(hcp->page,
+				    dbp->pgsize, hcp->indx);
+				hcp->dup_len = nval->size;
+				F_SET(hcp, H_ISDUP);
+				if (flags == DB_KEYFIRST)
+					hcp->dup_off = tmp_val.doff = 0;
+				else
+					hcp->dup_off =
+					    tmp_val.doff = hcp->dup_tlen;
+			}
+			break;
+		case DB_BEFORE:
+			tmp_val.doff = hcp->dup_off;
+			break;
+		case DB_AFTER:
+			tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len);
+			break;
+		}
+		/* Add the duplicate. */
+		ret = __ham_replpair(dbc, &tmp_val, 0);
+		if (ret == 0)
+			ret = memp_fset(dbp->mpf, hcp->page, DB_MPOOL_DIRTY);
+
+		if (ret != 0)
+			return (ret);
+
+		/* Now, update the cursor if necessary. */
+		switch (flags) {
+		case DB_AFTER:
+			hcp->dup_off += DUP_SIZE(hcp->dup_len);
+			hcp->dup_len = nval->size;
+			hcp->dup_tlen += DUP_SIZE(nval->size);
+			break;
+		case DB_KEYFIRST:
+		case DB_KEYLAST:
+		case DB_BEFORE:
+			hcp->dup_tlen += DUP_SIZE(nval->size);
+			hcp->dup_len = nval->size;
+			break;
+		}
+		ret = __ham_c_update(dbc, tmp_val.size, 1, 1);
+		return (ret);
+	}
+
+	/*
+	 * If we get here, then we're on duplicate pages; set pgnop and
+	 * return so the common code can handle it.
+	 */
+	memcpy(pgnop,
+	    HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)), sizeof(db_pgno_t));
+
+	return (ret);
+}
+
+/*
+ * Convert an on-page set of duplicates to an offpage set of duplicates.
+ *
+ * PUBLIC: int __ham_dup_convert __P((DBC *));
+ */
+int
+__ham_dup_convert(dbc)
+	DBC *dbc;
+{
+	DB *dbp;
+	DBC **hcs;
+	DB_LSN lsn;
+	PAGE *dp;
+	HASH_CURSOR *hcp;
+	BOVERFLOW bo;
+	DBT dbt;
+	HOFFPAGE ho;
+	db_indx_t i, len, off;
+	int c, ret, t_ret;
+	u_int8_t *p, *pend;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	/*
+	 * Create a new page for the duplicates.
+	 */
+	if ((ret = __db_new(dbc,
+	    dbp->dup_compare == NULL ? P_LRECNO : P_LDUP, &dp)) != 0)
+		return (ret);
+	P_INIT(dp, dbp->pgsize,
+	    dp->pgno, PGNO_INVALID, PGNO_INVALID, LEAFLEVEL, TYPE(dp));
+
+	/*
+	 * Get the list of cursors that may need to be updated.
+	 */
+	if ((ret = __ham_get_clist(dbp,
+	    PGNO(hcp->page), (u_int32_t)hcp->indx, &hcs)) != 0)
+		return (ret);
+
+	/*
+	 * Now put the duplicates onto the new page.
+	 */
+	dbt.flags = 0;
+	switch (HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx))) {
+	case H_KEYDATA:
+		/* Simple case, one key on page; move it to dup page. */
+		dbt.size = LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
+		dbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
+		ret = __db_pitem(dbc,
+		    dp, 0, BKEYDATA_SIZE(dbt.size), NULL, &dbt);
+		goto finish;
+	case H_OFFPAGE:
+		/* Simple case, one key on page; move it to dup page. */
+		memcpy(&ho,
+		    P_ENTRY(hcp->page, H_DATAINDEX(hcp->indx)), HOFFPAGE_SIZE);
+		UMRW_SET(bo.unused1);
+		B_TSET(bo.type, ho.type, 0);
+		UMRW_SET(bo.unused2);
+		bo.pgno = ho.pgno;
+		bo.tlen = ho.tlen;
+		dbt.size = BOVERFLOW_SIZE;
+		dbt.data = &bo;
+
+		ret = __db_pitem(dbc, dp, 0, dbt.size, &dbt, NULL);
+
+finish:		if (ret == 0) {
+			memp_fset(dbp->mpf, dp, DB_MPOOL_DIRTY);
+			/*
+			 * Update any other cursors
+			 */
+			if (hcs != NULL && DB_LOGGING(dbc)
+			     && IS_SUBTRANSACTION(dbc->txn)) {
+				if ((ret = __ham_chgpg_log(dbp->dbenv,
+				    dbc->txn, &lsn, 0, dbp->log_fileid,
+				    DB_HAM_DUP, PGNO(hcp->page),
+				    PGNO(dp), hcp->indx, 0)) != 0)
+					break;
+			}
+			for (c = 0; hcs != NULL && hcs[c] != NULL; c++)
+				if ((ret = __ham_dcursor(hcs[c],
+				    PGNO(dp), 0)) != 0)
+					break;
+
+		}
+		break;
+
+	case H_DUPLICATE:
+		p = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
+		pend = p +
+		    LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
+
+		/*
+		 * We need to maintain the duplicate cursor position.
+		 * Keep track of where we are in the duplicate set via
+		 * the offset, and when it matches the one in the cursor,
+		 * set the off-page duplicate cursor index to the current
+		 * index.
+		 */
+		for (off = 0, i = 0; p < pend; i++) {
+			memcpy(&len, p, sizeof(db_indx_t));
+			dbt.size = len;
+			p += sizeof(db_indx_t);
+			dbt.data = p;
+			p += len + sizeof(db_indx_t);
+			if ((ret = __db_pitem(dbc, dp,
+			    i, BKEYDATA_SIZE(dbt.size), NULL, &dbt)) != 0)
+				break;
+			/*
+			 * Update any other cursors
+			 */
+			for (c = 0; hcs != NULL && hcs[c] != NULL; c++)
+				if (((HASH_CURSOR *)(hcs[c]->internal))->dup_off
+				    == off && (ret = __ham_dcursor(hcs[c],
+				    PGNO(dp), i)) != 0)
+					goto out;
+			off += len + 2 * sizeof(db_indx_t);
+		}
+out:		break;
+
+	default:
+		ret = __db_pgfmt(dbp, (u_long)hcp->pgno);
+		break;
+	}
+	if (ret == 0) {
+		/*
+		 * Now attach this to the source page in place of
+		 * the old duplicate item.
+		 */
+		__ham_move_offpage(dbc, hcp->page,
+		    (u_int32_t)H_DATAINDEX(hcp->indx), PGNO(dp));
+
+		ret = memp_fset(dbp->mpf, hcp->page, DB_MPOOL_DIRTY);
+		if ((t_ret = memp_fput(dbp->mpf, dp, DB_MPOOL_DIRTY)) != 0)
+			ret = t_ret;
+		hcp->dup_tlen = hcp->dup_off = hcp->dup_len = 0;
+	} else
+		(void)__db_free(dbc, dp);
+
+	if (hcs != NULL)
+		__os_free(hcs, 0);
+
+	return (ret);
+}
+
+/*
+ * __ham_make_dup
+ *
+ * Take a regular dbt and make it into a duplicate item with all the partial
+ * information set appropriately. If the incoming dbt is a partial, assume
+ * we are creating a new entry and make sure that we do any initial padding.
+ *
+ * PUBLIC: int __ham_make_dup __P((DB_ENV *,
+ * PUBLIC:     const DBT *, DBT *d, void **, u_int32_t *));
+ */
+int
+__ham_make_dup(dbenv, notdup, duplicate, bufp, sizep)
+	DB_ENV *dbenv;
+	const DBT *notdup;
+	DBT *duplicate;
+	void **bufp;
+	u_int32_t *sizep;
+{
+	db_indx_t tsize, item_size;
+	int ret;
+	u_int8_t *p;
+
+	item_size = (db_indx_t)notdup->size;
+	if (F_ISSET(notdup, DB_DBT_PARTIAL))
+		item_size += notdup->doff;
+
+	tsize = DUP_SIZE(item_size);
+	if ((ret = __ham_init_dbt(dbenv, duplicate, tsize, bufp, sizep)) != 0)
+		return (ret);
+
+	duplicate->dlen = 0;
+	duplicate->flags = notdup->flags;
+	F_SET(duplicate, DB_DBT_PARTIAL);
+
+	p = duplicate->data;
+	memcpy(p, &item_size, sizeof(db_indx_t));
+	p += sizeof(db_indx_t);
+	if (F_ISSET(notdup, DB_DBT_PARTIAL)) {
+		memset(p, 0, notdup->doff);
+		p += notdup->doff;
+	}
+	memcpy(p, notdup->data, notdup->size);
+	p += notdup->size;
+	memcpy(p, &item_size, sizeof(db_indx_t));
+
+	duplicate->doff = 0;
+	duplicate->dlen = notdup->size;
+
+	return (0);
+}
+
+/*
+ * __ham_check_move --
+ *
+ * Check if we can do whatever we need to on this page.  If not,
+ * then we'll have to move the current element to a new page.
+ */
+static int
+__ham_check_move(dbc, add_len)
+	DBC *dbc;
+	u_int32_t add_len;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	DBT k, d;
+	DB_LSN new_lsn;
+	PAGE *next_pagep;
+	db_pgno_t next_pgno;
+	u_int32_t new_datalen, old_len, rectype;
+	u_int8_t *hk;
+	int ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	hk = H_PAIRDATA(hcp->page, hcp->indx);
+
+	/*
+	 * If the item is already off page duplicates or an offpage item,
+	 * then we know we can do whatever we need to do in-place
+	 */
+	if (HPAGE_PTYPE(hk) == H_OFFDUP || HPAGE_PTYPE(hk) == H_OFFPAGE)
+		return (0);
+
+	old_len = LEN_HITEM(hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx));
+	new_datalen = old_len - HKEYDATA_SIZE(0) + add_len;
+	if (HPAGE_PTYPE(hk) != H_DUPLICATE)
+		new_datalen += DUP_SIZE(0);
+
+	/*
+	 * We need to add a new page under two conditions:
+	 * 1. The addition makes the total data length cross the BIG
+	 *    threshold and the OFFDUP structure won't fit on this page.
+	 * 2. The addition does not make the total data cross the
+	 *    threshold, but the new data won't fit on the page.
+	 * If neither of these is true, then we can return.
+	 */
+	if (ISBIG(hcp, new_datalen) && (old_len > HOFFDUP_SIZE ||
+	    HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->page)))
+		return (0);
+
+	if (!ISBIG(hcp, new_datalen) && add_len <= P_FREESPACE(hcp->page))
+		return (0);
+
+	/*
+	 * If we get here, then we need to move the item to a new page.
+	 * Check if there are more pages in the chain.  We now need to
+	 * update new_datalen to include the size of both the key and
+	 * the data that we need to move.
+	 */
+
+	new_datalen = ISBIG(hcp, new_datalen) ?
+	    HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen);
+	new_datalen += LEN_HITEM(hcp->page, dbp->pgsize, H_KEYINDEX(hcp->indx));
+
+	next_pagep = NULL;
+	for (next_pgno = NEXT_PGNO(hcp->page); next_pgno != PGNO_INVALID;
+	    next_pgno = NEXT_PGNO(next_pagep)) {
+		if (next_pagep != NULL &&
+		    (ret = memp_fput(dbp->mpf, next_pagep, 0)) != 0)
+			return (ret);
+
+		if ((ret = memp_fget(dbp->mpf,
+		    &next_pgno, DB_MPOOL_CREATE, &next_pagep)) != 0)
+			return (ret);
+
+		if (P_FREESPACE(next_pagep) >= new_datalen)
+			break;
+	}
+
+	/* No more pages, add one. */
+	if (next_pagep == NULL && (ret = __ham_add_ovflpage(dbc,
+	    hcp->page, 0, &next_pagep)) != 0)
+		return (ret);
+
+	/* Add new page at the end of the chain. */
+	if (P_FREESPACE(next_pagep) < new_datalen && (ret =
+	    __ham_add_ovflpage(dbc, next_pagep, 1, &next_pagep)) != 0) {
+		(void)memp_fput(dbp->mpf, next_pagep, 0);
+		return (ret);
+	}
+
+	/* Copy the item to the new page. */
+	if (DB_LOGGING(dbc)) {
+		rectype = PUTPAIR;
+		k.flags = 0;
+		d.flags = 0;
+		if (HPAGE_PTYPE(
+		    H_PAIRKEY(hcp->page, hcp->indx)) == H_OFFPAGE) {
+			rectype |= PAIR_KEYMASK;
+			k.data = H_PAIRKEY(hcp->page, hcp->indx);
+			k.size = HOFFPAGE_SIZE;
+		} else {
+			k.data =
+			    HKEYDATA_DATA(H_PAIRKEY(hcp->page, hcp->indx));
+			k.size = LEN_HKEY(hcp->page, dbp->pgsize, hcp->indx);
+		}
+
+		if (HPAGE_PTYPE(hk) == H_OFFPAGE) {
+			rectype |= PAIR_DATAMASK;
+			d.data = H_PAIRDATA(hcp->page, hcp->indx);
+			d.size = HOFFPAGE_SIZE;
+		} else {
+			if (HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx))
+			    == H_DUPLICATE)
+				rectype |= PAIR_DUPMASK;
+			d.data =
+			    HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
+			d.size = LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
+		}
+
+		if ((ret = __ham_insdel_log(dbp->dbenv,
+		    dbc->txn, &new_lsn, 0, rectype,
+		    dbp->log_fileid, PGNO(next_pagep),
+		    (u_int32_t)NUM_ENT(next_pagep), &LSN(next_pagep),
+		    &k, &d)) != 0) {
+			(void)memp_fput(dbp->mpf, next_pagep, 0);
+			return (ret);
+		}
+
+		/* Move lsn onto page. */
+		LSN(next_pagep) = new_lsn;	/* Structure assignment. */
+	}
+
+	__ham_copy_item(dbp->pgsize,
+	    hcp->page, H_KEYINDEX(hcp->indx), next_pagep);
+	__ham_copy_item(dbp->pgsize,
+	    hcp->page, H_DATAINDEX(hcp->indx), next_pagep);
+
+	/*
+	 * We've just manually inserted a key and set of data onto
+	 * next_pagep;  however, it's possible that our caller will
+	 * return without further modifying the new page, for instance
+	 * if DB_NODUPDATA is set and our new item is a duplicate duplicate.
+	 * Thus, to be on the safe side, we need to mark the page dirty
+	 * here. [#2996]
+	 *
+	 * Note that __ham_del_pair should dirty the page we're moving
+	 * the items from, so we need only dirty the new page ourselves.
+	 */
+	if ((ret = memp_fset(dbp->mpf, next_pagep, DB_MPOOL_DIRTY)) != 0)
+		goto out;
+
+	/* Update all cursors that used to point to this item. */
+	if ((ret = __ham_c_chgpg(dbc, PGNO(hcp->page), H_KEYINDEX(hcp->indx),
+	    PGNO(next_pagep), NUM_ENT(next_pagep) - 2)) != 0)
+		goto out;
+
+	/* Now delete the pair from the current page. */
+	ret = __ham_del_pair(dbc, 0);
+
+	/*
+	 * __ham_del_pair decremented nelem.  This is incorrect;  we
+	 * manually copied the element elsewhere, so the total number
+	 * of elements hasn't changed.  Increment it again.
+	 */
+	if (!STD_LOCKING(dbc))
+		hcp->hdr->nelem++;
+
+out:
+	(void)memp_fput(dbp->mpf, hcp->page, DB_MPOOL_DIRTY);
+	hcp->page = next_pagep;
+	hcp->pgno = PGNO(hcp->page);
+	hcp->indx = NUM_ENT(hcp->page) - 2;
+	F_SET(hcp, H_EXPAND);
+	F_CLR(hcp, H_DELETED);
+
+	return (ret);
+}
+
+/*
+ * __ham_move_offpage --
+ *	Replace an onpage set of duplicates with the OFFDUP structure
+ *	that references the duplicate page.
+ *
+ * XXX
+ * This is really just a special case of __onpage_replace; we should
+ * probably combine them.
+ *
+ * PUBLIC: void __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t));
+ */
+void
+__ham_move_offpage(dbc, pagep, ndx, pgno)
+	DBC *dbc;
+	PAGE *pagep;
+	u_int32_t ndx;
+	db_pgno_t pgno;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	DBT new_dbt;
+	DBT old_dbt;
+	HOFFDUP od;
+	db_indx_t i;
+	int32_t shrink;
+	u_int8_t *src;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	od.type = H_OFFDUP;
+	UMRW_SET(od.unused[0]);
+	UMRW_SET(od.unused[1]);
+	UMRW_SET(od.unused[2]);
+	od.pgno = pgno;
+
+	if (DB_LOGGING(dbc)) {
+		new_dbt.data = &od;
+		new_dbt.size = HOFFDUP_SIZE;
+		old_dbt.data = P_ENTRY(pagep, ndx);
+		old_dbt.size = LEN_HITEM(pagep, dbp->pgsize, ndx);
+		(void)__ham_replace_log(dbp->dbenv,
+		    dbc->txn, &LSN(pagep), 0, dbp->log_fileid,
+		    PGNO(pagep), (u_int32_t)ndx, &LSN(pagep), -1,
+		    &old_dbt, &new_dbt, 0);
+	}
+
+	shrink = LEN_HITEM(pagep, dbp->pgsize, ndx) - HOFFDUP_SIZE;
+
+	if (shrink != 0) {
+		/* Copy data. */
+		src = (u_int8_t *)(pagep) + HOFFSET(pagep);
+		memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep));
+		HOFFSET(pagep) += shrink;
+
+		/* Update index table. */
+		for (i = ndx; i < NUM_ENT(pagep); i++)
+			pagep->inp[i] += shrink;
+	}
+
+	/* Now copy the offdup entry onto the page. */
+	memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE);
+}
+
+/*
+ * __ham_dsearch:
+ *	Locate a particular duplicate in a duplicate set.  Make sure that
+ *	we exit with the cursor set appropriately.
+ *
+ * PUBLIC: void __ham_dsearch __P((DBC *, DBT *, u_int32_t *, int *));
+ */
+void
+__ham_dsearch(dbc, dbt, offp, cmpp)
+	DBC *dbc;
+	DBT *dbt;
+	u_int32_t *offp;
+	int *cmpp;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	DBT cur;
+	db_indx_t i, len;
+	int (*func) __P((DB *, const DBT *, const DBT *));
+	u_int8_t *data;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if (dbp->dup_compare == NULL)
+		func = __bam_defcmp;
+	else
+		func = dbp->dup_compare;
+
+	i = F_ISSET(hcp, H_CONTINUE) ? hcp->dup_off: 0;
+	data = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)) + i;
+	hcp->dup_tlen = LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
+	while (i < hcp->dup_tlen) {
+		memcpy(&len, data, sizeof(db_indx_t));
+		data += sizeof(db_indx_t);
+		cur.data = data;
+		cur.size = (u_int32_t)len;
+		*cmpp = func(dbp, dbt, &cur);
+		if (*cmpp == 0 || (*cmpp < 0 && dbp->dup_compare != NULL))
+			break;
+		i += len + 2 * sizeof(db_indx_t);
+		data += len + sizeof(db_indx_t);
+	}
+	*offp = i;
+	hcp->dup_off = i;
+	hcp->dup_len = len;
+	F_SET(hcp, H_ISDUP);
+}
+
+#ifdef DEBUG
+/*
+ * __ham_cprint --
+ *	Display the current cursor list.
+ *
+ * PUBLIC: int __ham_cprint __P((DB *));
+ */
+int
+__ham_cprint(dbp)
+	DB *dbp;
+{
+	HASH_CURSOR *cp;
+	DBC *dbc;
+
+	MUTEX_THREAD_LOCK(dbp->dbenv, dbp->mutexp);
+	for (dbc = TAILQ_FIRST(&dbp->active_queue);
+	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+		cp = (HASH_CURSOR *)dbc->internal;
+		fprintf(stderr, "%#0lx->%#0lx: page: %lu index: %lu",
+		    P_TO_ULONG(dbc), P_TO_ULONG(cp), (u_long)cp->pgno,
+		    (u_long)cp->indx);
+		if (F_ISSET(cp, H_DELETED))
+			fprintf(stderr, " (deleted)");
+		fprintf(stderr, "\n");
+	}
+	MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp);
+
+	return (0);
+}
+#endif /* DEBUG */
+
+/*
+ * __ham_dcursor --
+ *
+ *	Create an off page duplicate cursor for this cursor.
+ */
+static int
+__ham_dcursor(dbc, pgno, indx)
+	DBC *dbc;
+	db_pgno_t pgno;
+	u_int32_t indx;
+{
+	DB *dbp;
+	DBC *dbc_nopd;
+	HASH_CURSOR *hcp;
+	BTREE_CURSOR *dcp;
+	int ret;
+
+	dbp = dbc->dbp;
+
+	if ((ret = __db_c_newopd(dbc, pgno, &dbc_nopd)) != 0)
+		return (ret);
+
+	dcp = (BTREE_CURSOR *)dbc_nopd->internal;
+	dcp->pgno = pgno;
+	dcp->indx = indx;
+
+	if (dbp->dup_compare == NULL) {
+		/*
+		 * Converting to off-page Recno trees is tricky.  The
+		 * record number for the cursor is the index + 1 (to
+		 * convert to 1-based record numbers).
+		 */
+		dcp->recno = indx + 1;
+	}
+
+	/*
+	 * Transfer the deleted flag from the top-level cursor to the
+	 * created one.
+	 */
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if (F_ISSET(hcp, H_DELETED)) {
+		F_SET(dcp, C_DELETED);
+		F_CLR(hcp, H_DELETED);
+	}
+
+	/* Stack the cursors and reset the initial cursor's index. */
+	hcp->opd = dbc_nopd;
+
+	return (0);
+}
diff --git a/bdb/hash/hash_func.c b/bdb/hash/hash_func.c
new file mode 100644
index 00000000000..22b4f08ee70
--- /dev/null
+++ b/bdb/hash/hash_func.c
@@ -0,0 +1,242 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ *	Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_func.c,v 11.7 2000/08/16 18:26:19 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "hash.h"
+
+/*
+ * __ham_func2 --
+ *	Phong Vo's linear congruential hash.
+ *
+ * PUBLIC: u_int32_t __ham_func2 __P((DB *, const void *, u_int32_t));
+ */
+#define	DCHARHASH(h, c)	((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
+
+u_int32_t
+__ham_func2(dbp, key, len)
+	DB *dbp;
+	const void *key;
+	u_int32_t len;
+{
+	const u_int8_t *e, *k;
+	u_int32_t h;
+	u_int8_t c;
+
+	if (dbp != NULL)
+		COMPQUIET(dbp, NULL);
+
+	k = key;
+	e = k + len;
+	for (h = 0; k != e;) {
+		c = *k++;
+		if (!c && k > e)
+			break;
+		DCHARHASH(h, c);
+	}
+	return (h);
+}
+
+/*
+ * __ham_func3 --
+ *	Ozan Yigit's original sdbm hash.
+ *
+ * Ugly, but fast.  Break the string up into 8 byte units.  On the first time
+ * through the loop get the "leftover bytes" (strlen % 8).  On every other
+ * iteration, perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
+ * saves us 7 cmp & branch instructions.
+ *
+ * PUBLIC: u_int32_t __ham_func3 __P((DB *, const void *, u_int32_t));
+ */
+u_int32_t
+__ham_func3(dbp, key, len)
+	DB *dbp;
+	const void *key;
+	u_int32_t len;
+{
+	const u_int8_t *k;
+	u_int32_t n, loop;
+
+	if (dbp != NULL)
+		COMPQUIET(dbp, NULL);
+
+	if (len == 0)
+		return (0);
+
+#define	HASHC	n = *k++ + 65599 * n
+	n = 0;
+	k = key;
+
+	loop = (len + 8 - 1) >> 3;
+	switch (len & (8 - 1)) {
+	case 0:
+		do {
+			HASHC;
+	case 7:
+			HASHC;
+	case 6:
+			HASHC;
+	case 5:
+			HASHC;
+	case 4:
+			HASHC;
+	case 3:
+			HASHC;
+	case 2:
+			HASHC;
+	case 1:
+			HASHC;
+		} while (--loop);
+	}
+	return (n);
+}
+
+/*
+ * __ham_func4 --
+ *	Chris Torek's hash function.  Although this function performs only
+ *	slightly worse than __ham_func5 on strings, it performs horribly on
+ *	numbers.
+ *
+ * PUBLIC: u_int32_t __ham_func4 __P((DB *, const void *, u_int32_t));
+ */
+u_int32_t
+__ham_func4(dbp, key, len)
+	DB *dbp;
+	const void *key;
+	u_int32_t len;
+{
+	const u_int8_t *k;
+	u_int32_t h, loop;
+
+	if (dbp != NULL)
+		COMPQUIET(dbp, NULL);
+
+	if (len == 0)
+		return (0);
+
+#define	HASH4a	h = (h << 5) - h + *k++;
+#define	HASH4b	h = (h << 5) + h + *k++;
+#define	HASH4	HASH4b
+	h = 0;
+	k = key;
+
+	loop = (len + 8 - 1) >> 3;
+	switch (len & (8 - 1)) {
+	case 0:
+		do {
+			HASH4;
+	case 7:
+			HASH4;
+	case 6:
+			HASH4;
+	case 5:
+			HASH4;
+	case 4:
+			HASH4;
+	case 3:
+			HASH4;
+	case 2:
+			HASH4;
+	case 1:
+			HASH4;
+		} while (--loop);
+	}
+	return (h);
+}
+
+/*
+ * Fowler/Noll/Vo hash
+ *
+ * The basis of the hash algorithm was taken from an idea sent by email to the
+ * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and
+ * Glenn Fowler (gsf@research.att.com).  Landon Curt Noll (chongo@toad.com)
+ * later improved on their algorithm.
+ *
+ * The magic is in the interesting relationship between the special prime
+ * 16777619 (2^24 + 403) and 2^32 and 2^8.
+ *
+ * This hash produces the fewest collisions of any function that we've seen so
+ * far, and works well on both numbers and strings.
+ *
+ * PUBLIC: u_int32_t __ham_func5 __P((DB *, const void *, u_int32_t));
+ */
+u_int32_t
+__ham_func5(dbp, key, len)
+	DB *dbp;
+	const void *key;
+	u_int32_t len;
+{
+	const u_int8_t *k, *e;
+	u_int32_t h;
+
+	if (dbp != NULL)
+		COMPQUIET(dbp, NULL);
+
+	k = key;
+	e = k + len;
+	for (h = 0; k < e; ++k) {
+		h *= 16777619;
+		h ^= *k;
+	}
+	return (h);
+}
+
+u_int32_t
+__ham_test(dbp, key, len)
+	DB *dbp;
+	const void *key;
+	u_int32_t len;
+{
+	COMPQUIET(dbp, NULL);
+	COMPQUIET(len, 0);
+	return ((u_int32_t)*(char *)key);
+}
diff --git a/bdb/hash/hash_meta.c b/bdb/hash/hash_meta.c
new file mode 100644
index 00000000000..d96a6db3207
--- /dev/null
+++ b/bdb/hash/hash_meta.c
@@ -0,0 +1,121 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_meta.c,v 11.10 2000/12/21 21:54:35 margo Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "hash.h"
+#include "db_shash.h"
+#include "lock.h"
+#include "txn.h"
+
+/*
+ * Acquire the meta-data page.
+ *
+ * PUBLIC: int __ham_get_meta __P((DBC *));
+ */
+int
+__ham_get_meta(dbc)
+	DBC *dbc;
+{
+	HASH_CURSOR *hcp;
+	HASH *hashp;
+	DB *dbp;
+	int ret;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	dbp = dbc->dbp;
+	hashp = dbp->h_internal;
+
+	if (dbp->dbenv != NULL &&
+	    STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER)) {
+		dbc->lock.pgno = hashp->meta_pgno;
+		if ((ret = lock_get(dbp->dbenv, dbc->locker,
+		    DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0,
+		    &dbc->lock_dbt, DB_LOCK_READ, &hcp->hlock)) != 0)
+			return (ret);
+	}
+
+	if ((ret = memp_fget(dbc->dbp->mpf,
+	    &hashp->meta_pgno, DB_MPOOL_CREATE, &(hcp->hdr))) != 0 &&
+	    hcp->hlock.off != LOCK_INVALID) {
+		(void)lock_put(dbc->dbp->dbenv, &hcp->hlock);
+		hcp->hlock.off = LOCK_INVALID;
+	}
+
+	return (ret);
+}
+
+/*
+ * Release the meta-data page.
+ *
+ * PUBLIC: int __ham_release_meta __P((DBC *));
+ */
+int
+__ham_release_meta(dbc)
+	DBC *dbc;
+{
+	HASH_CURSOR *hcp;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if (hcp->hdr)
+		(void)memp_fput(dbc->dbp->mpf, hcp->hdr,
+		    F_ISSET(hcp, H_DIRTY) ? DB_MPOOL_DIRTY : 0);
+	hcp->hdr = NULL;
+	if (!F_ISSET(dbc, DBC_RECOVER) &&
+	    dbc->txn == NULL && hcp->hlock.off != LOCK_INVALID)
+		(void)lock_put(dbc->dbp->dbenv, &hcp->hlock);
+	hcp->hlock.off = LOCK_INVALID;
+	F_CLR(hcp, H_DIRTY);
+
+	return (0);
+}
+
+/*
+ * Mark the meta-data page dirty.
+ *
+ * PUBLIC: int __ham_dirty_meta __P((DBC *));
+ */
+int
+__ham_dirty_meta(dbc)
+	DBC *dbc;
+{
+	DB *dbp;
+	DB_LOCK _tmp;
+	HASH *hashp;
+	HASH_CURSOR *hcp;
+	int ret;
+
+	dbp = dbc->dbp;
+	hashp = dbp->h_internal;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	ret = 0;
+	if (STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER)) {
+		dbc->lock.pgno = hashp->meta_pgno;
+		if ((ret = lock_get(dbp->dbenv, dbc->locker,
+		    DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0,
+		    &dbc->lock_dbt, DB_LOCK_WRITE, &_tmp)) == 0) {
+			ret = lock_put(dbp->dbenv, &hcp->hlock);
+			hcp->hlock = _tmp;
+		}
+	}
+
+	if (ret == 0)
+		F_SET(hcp, H_DIRTY);
+	return (ret);
+}
diff --git a/bdb/hash/hash_method.c b/bdb/hash/hash_method.c
new file mode 100644
index 00000000000..f8239993dc5
--- /dev/null
+++ b/bdb/hash/hash_method.c
@@ -0,0 +1,126 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_method.c,v 11.7 2000/07/04 18:28:23 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "hash.h"
+
+static int __ham_set_h_ffactor __P((DB *, u_int32_t));
+static int __ham_set_h_hash
+	       __P((DB *, u_int32_t(*)(DB *, const void *, u_int32_t)));
+static int __ham_set_h_nelem __P((DB *, u_int32_t));
+
+/*
+ * __ham_db_create --
+ *	Hash specific initialization of the DB structure.
+ *
+ * PUBLIC: int __ham_db_create __P((DB *));
+ */
+int
+__ham_db_create(dbp)
+	DB *dbp;
+{
+	HASH *hashp;
+	int ret;
+
+	if ((ret = __os_malloc(dbp->dbenv,
+	    sizeof(HASH), NULL, &dbp->h_internal)) != 0)
+		return (ret);
+
+	hashp = dbp->h_internal;
+
+	hashp->h_nelem = 0;			/* Defaults. */
+	hashp->h_ffactor = 0;
+	hashp->h_hash = NULL;
+
+	dbp->set_h_ffactor = __ham_set_h_ffactor;
+	dbp->set_h_hash = __ham_set_h_hash;
+	dbp->set_h_nelem = __ham_set_h_nelem;
+
+	return (0);
+}
+
+/*
+ * PUBLIC: int __ham_db_close __P((DB *));
+ */
+int
+__ham_db_close(dbp)
+	DB *dbp;
+{
+	if (dbp->h_internal == NULL)
+		return (0);
+	__os_free(dbp->h_internal, sizeof(HASH));
+	dbp->h_internal = NULL;
+	return (0);
+}
+
+/*
+ * __ham_set_h_ffactor --
+ *	Set the fill factor.
+ */
+static int
+__ham_set_h_ffactor(dbp, h_ffactor)
+	DB *dbp;
+	u_int32_t h_ffactor;
+{
+	HASH *hashp;
+
+	DB_ILLEGAL_AFTER_OPEN(dbp, "set_h_ffactor");
+	DB_ILLEGAL_METHOD(dbp, DB_OK_HASH);
+
+	hashp = dbp->h_internal;
+	hashp->h_ffactor = h_ffactor;
+	return (0);
+}
+
+/*
+ * __ham_set_h_hash --
+ *	Set the hash function.
+ */
+static int
+__ham_set_h_hash(dbp, func)
+	DB *dbp;
+	u_int32_t (*func) __P((DB *, const void *, u_int32_t));
+{
+	HASH *hashp;
+
+	DB_ILLEGAL_AFTER_OPEN(dbp, "set_h_hash");
+	DB_ILLEGAL_METHOD(dbp, DB_OK_HASH);
+
+	hashp = dbp->h_internal;
+	hashp->h_hash = func;
+	return (0);
+}
+
+/*
+ * __ham_set_h_nelem --
+ *	Set the table size.
+ */
+static int
+__ham_set_h_nelem(dbp, h_nelem)
+	DB *dbp;
+	u_int32_t h_nelem;
+{
+	HASH *hashp;
+
+	DB_ILLEGAL_AFTER_OPEN(dbp, "set_h_nelem");
+	DB_ILLEGAL_METHOD(dbp, DB_OK_HASH);
+
+	hashp = dbp->h_internal;
+	hashp->h_nelem = h_nelem;
+	return (0);
+}
diff --git a/bdb/hash/hash_page.c b/bdb/hash/hash_page.c
new file mode 100644
index 00000000000..64f38853284
--- /dev/null
+++ b/bdb/hash/hash_page.c
@@ -0,0 +1,1655 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *	Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_page.c,v 11.46 2001/01/11 18:19:51 bostic Exp $";
+#endif /* not lint */
+
+/*
+ * PACKAGE:  hashing
+ *
+ * DESCRIPTION:
+ *      Page manipulation for hashing package.
+ *
+ * ROUTINES:
+ *
+ * External
+ *      __get_page
+ *      __add_ovflpage
+ *      __overflow_page
+ * Internal
+ *      open_temp
+ */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "hash.h"
+#include "lock.h"
+#include "txn.h"
+
+/*
+ * PUBLIC: int __ham_item __P((DBC *, db_lockmode_t, db_pgno_t *));
+ */
+int
+__ham_item(dbc, mode, pgnop)
+	DBC *dbc;
+	db_lockmode_t mode;
+	db_pgno_t *pgnop;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	db_pgno_t next_pgno;
+	int ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if (F_ISSET(hcp, H_DELETED)) {
+		__db_err(dbp->dbenv, "Attempt to return a deleted item");
+		return (EINVAL);
+	}
+	F_CLR(hcp, H_OK | H_NOMORE);
+
+	/* Check if we need to get a page for this cursor. */
+	if ((ret = __ham_get_cpage(dbc, mode)) != 0)
+		return (ret);
+
+recheck:
+	/* Check if we are looking for space in which to insert an item. */
+	if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID
+	    && hcp->seek_size < P_FREESPACE(hcp->page))
+		hcp->seek_found_page = hcp->pgno;
+
+	/* Check for off-page duplicates. */
+	if (hcp->indx < NUM_ENT(hcp->page) &&
+	    HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) {
+		memcpy(pgnop,
+		    HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)),
+		    sizeof(db_pgno_t));
+		F_SET(hcp, H_OK);
+		return (0);
+	}
+
+	/* Check if we need to go on to the next page. */
+	if (F_ISSET(hcp, H_ISDUP))
+		/*
+		 * ISDUP is set, and offset is at the beginning of the datum.
+		 * We need to grab the length of the datum, then set the datum
+		 * pointer to be the beginning of the datum.
+		 */
+		memcpy(&hcp->dup_len,
+		    HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)) +
+		    hcp->dup_off, sizeof(db_indx_t));
+
+	if (hcp->indx >= (db_indx_t)NUM_ENT(hcp->page)) {
+		/* Fetch next page. */
+		if (NEXT_PGNO(hcp->page) == PGNO_INVALID) {
+			F_SET(hcp, H_NOMORE);
+			return (DB_NOTFOUND);
+		}
+		next_pgno = NEXT_PGNO(hcp->page);
+		hcp->indx = 0;
+		if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0)
+			return (ret);
+		goto recheck;
+	}
+
+	F_SET(hcp, H_OK);
+	return (0);
+}
+
+/*
+ * PUBLIC: int __ham_item_reset __P((DBC *));
+ */
+int
+__ham_item_reset(dbc)
+	DBC *dbc;
+{
+	HASH_CURSOR *hcp;
+	DB *dbp;
+	int ret;
+
+	ret = 0;
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if (hcp->page != NULL)
+		ret = memp_fput(dbp->mpf, hcp->page, 0);
+
+	__ham_item_init(dbc);
+	return (ret);
+}
+
+/*
+ * PUBLIC: void __ham_item_init __P((DBC *));
+ */
+void
+__ham_item_init(dbc)
+	DBC *dbc;
+{
+	HASH_CURSOR *hcp;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	/*
+	 * If this cursor still holds any locks, we must
+	 * release them if we are not running with transactions.
+	 */
+	if (hcp->lock.off != LOCK_INVALID && dbc->txn == NULL)
+	    (void)lock_put(dbc->dbp->dbenv, &hcp->lock);
+
+	/*
+	 * The following fields must *not* be initialized here
+	 * because they may have meaning across inits.
+	 *	hlock, hdr, split_buf, stats
+	 */
+	hcp->bucket = BUCKET_INVALID;
+	hcp->lbucket = BUCKET_INVALID;
+	hcp->lock.off = LOCK_INVALID;
+	hcp->lock_mode = DB_LOCK_NG;
+	hcp->dup_off = 0;
+	hcp->dup_len = 0;
+	hcp->dup_tlen = 0;
+	hcp->seek_size = 0;
+	hcp->seek_found_page = PGNO_INVALID;
+	hcp->flags = 0;
+
+	hcp->pgno = PGNO_INVALID;
+	hcp->indx = NDX_INVALID;
+	hcp->page = NULL;
+}
+
+/*
+ * Returns the last item in a bucket.
+ *
+ * PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t, db_pgno_t *));
+ */
+int
+__ham_item_last(dbc, mode, pgnop)
+	DBC *dbc;
+	db_lockmode_t mode;
+	db_pgno_t *pgnop;
+{
+	HASH_CURSOR *hcp;
+	int ret;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if ((ret = __ham_item_reset(dbc)) != 0)
+		return (ret);
+
+	hcp->bucket = hcp->hdr->max_bucket;
+	hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+	F_SET(hcp, H_OK);
+	return (__ham_item_prev(dbc, mode, pgnop));
+}
+
+/*
+ * PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t, db_pgno_t *));
+ */
+int
+__ham_item_first(dbc, mode, pgnop)
+	DBC *dbc;
+	db_lockmode_t mode;
+	db_pgno_t *pgnop;
+{
+	HASH_CURSOR *hcp;
+	int ret;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if ((ret = __ham_item_reset(dbc)) != 0)
+		return (ret);
+	F_SET(hcp, H_OK);
+	hcp->bucket = 0;
+	hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+	return (__ham_item_next(dbc, mode, pgnop));
+}
+
+/*
+ * __ham_item_prev --
+ *	Returns a pointer to key/data pair on a page.  In the case of
+ *	bigkeys, just returns the page number and index of the bigkey
+ *	pointer pair.
+ *
+ * PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t, db_pgno_t *));
+ */
+int
+__ham_item_prev(dbc, mode, pgnop)
+	DBC *dbc;
+	db_lockmode_t mode;
+	db_pgno_t *pgnop;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	db_pgno_t next_pgno;
+	int ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	/*
+	 * There are 5 cases for backing up in a hash file.
+	 * Case 1: In the middle of a page, no duplicates, just dec the index.
+	 * Case 2: In the middle of a duplicate set, back up one.
+	 * Case 3: At the beginning of a duplicate set, get out of set and
+	 *	back up to next key.
+	 * Case 4: At the beginning of a page; go to previous page.
+	 * Case 5: At the beginning of a bucket; go to prev bucket.
+	 */
+	F_CLR(hcp, H_OK | H_NOMORE | H_DELETED);
+
+	if ((ret = __ham_get_cpage(dbc, mode)) != 0)
+		return (ret);
+
+	/*
+	 * First handle the duplicates.  Either you'll get the key here
+	 * or you'll exit the duplicate set and drop into the code below
+	 * to handle backing up through keys.
+	 */
+	if (!F_ISSET(hcp, H_NEXT_NODUP) && F_ISSET(hcp, H_ISDUP)) {
+		if (HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) {
+			memcpy(pgnop,
+			    HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)),
+			    sizeof(db_pgno_t));
+			F_SET(hcp, H_OK);
+			return (0);
+		}
+
+		/* Duplicates are on-page. */
+		if (hcp->dup_off != 0) {
+			memcpy(&hcp->dup_len, HKEYDATA_DATA(
+			    H_PAIRDATA(hcp->page, hcp->indx))
+			    + hcp->dup_off - sizeof(db_indx_t),
+			    sizeof(db_indx_t));
+			hcp->dup_off -=
+			    DUP_SIZE(hcp->dup_len);
+			return (__ham_item(dbc, mode, pgnop));
+		}
+	}
+
+	/*
+	 * If we get here, we are not in a duplicate set, and just need
+	 * to back up the cursor.  There are still three cases:
+	 * midpage, beginning of page, beginning of bucket.
+	 */
+
+	if (F_ISSET(hcp, H_DUPONLY)) {
+		F_CLR(hcp, H_OK);
+		F_SET(hcp, H_NOMORE);
+		return (0);
+	} else
+		/*
+		 * We are no longer in a dup set;  flag this so the dup code
+		 * will reinitialize should we stumble upon another one.
+		 */
+		F_CLR(hcp, H_ISDUP);
+
+	if (hcp->indx == 0) {		/* Beginning of page. */
+		hcp->pgno = PREV_PGNO(hcp->page);
+		if (hcp->pgno == PGNO_INVALID) {
+			/* Beginning of bucket. */
+			F_SET(hcp, H_NOMORE);
+			return (DB_NOTFOUND);
+		} else if ((ret =
+		    __ham_next_cpage(dbc, hcp->pgno, 0)) != 0)
+			return (ret);
+		else
+			hcp->indx = NUM_ENT(hcp->page);
+	}
+
+	/*
+	 * Either we've got the cursor set up to be decremented, or we
+	 * have to find the end of a bucket.
+	 */
+	if (hcp->indx == NDX_INVALID) {
+		DB_ASSERT(hcp->page != NULL);
+
+		hcp->indx = NUM_ENT(hcp->page);
+		for (next_pgno = NEXT_PGNO(hcp->page);
+		    next_pgno != PGNO_INVALID;
+		    next_pgno = NEXT_PGNO(hcp->page)) {
+			if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0)
+				return (ret);
+			hcp->indx = NUM_ENT(hcp->page);
+		}
+
+		if (hcp->indx == 0) {
+			/* Bucket was empty. */
+			F_SET(hcp, H_NOMORE);
+			return (DB_NOTFOUND);
+		}
+	}
+
+	hcp->indx -= 2;
+
+	return (__ham_item(dbc, mode, pgnop));
+}
+
+/*
+ * Sets the cursor to the next key/data pair on a page.
+ *
+ * PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t, db_pgno_t *));
+ */
+int
+__ham_item_next(dbc, mode, pgnop)
+	DBC *dbc;
+	db_lockmode_t mode;
+	db_pgno_t *pgnop;
+{
+	HASH_CURSOR *hcp;
+	int ret;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if ((ret = __ham_get_cpage(dbc, mode)) != 0)
+		return (ret);
+
+	/*
+	 * Deleted on-page duplicates are a weird case. If we delete the last
+	 * one, then our cursor is at the very end of a duplicate set and
+	 * we actually need to go on to the next key.
+	 */
+	if (F_ISSET(hcp, H_DELETED)) {
+		if (hcp->indx != NDX_INVALID &&
+		    F_ISSET(hcp, H_ISDUP) &&
+		    HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx))
+			== H_DUPLICATE && hcp->dup_tlen == hcp->dup_off) {
+			if (F_ISSET(hcp, H_DUPONLY)) {
+				F_CLR(hcp, H_OK);
+				F_SET(hcp, H_NOMORE);
+				return (0);
+			} else {
+				F_CLR(hcp, H_ISDUP);
+				hcp->indx += 2;
+			}
+		} else if (!F_ISSET(hcp, H_ISDUP) && F_ISSET(hcp, H_DUPONLY)) {
+			F_CLR(hcp, H_OK);
+			F_SET(hcp, H_NOMORE);
+			return (0);
+		} else if (F_ISSET(hcp, H_ISDUP) &&
+		    F_ISSET(hcp, H_NEXT_NODUP)) {
+			F_CLR(hcp, H_ISDUP);
+			hcp->indx += 2;
+		}
+		F_CLR(hcp, H_DELETED);
+	} else if (hcp->indx == NDX_INVALID) {
+		hcp->indx = 0;
+		F_CLR(hcp, H_ISDUP);
+	} else if (F_ISSET(hcp, H_NEXT_NODUP)) {
+		hcp->indx += 2;
+		F_CLR(hcp, H_ISDUP);
+	} else if (F_ISSET(hcp, H_ISDUP) && hcp->dup_tlen != 0) {
+		if (hcp->dup_off + DUP_SIZE(hcp->dup_len) >=
+		    hcp->dup_tlen && F_ISSET(hcp, H_DUPONLY)) {
+			F_CLR(hcp, H_OK);
+			F_SET(hcp, H_NOMORE);
+			return (0);
+		}
+		hcp->dup_off += DUP_SIZE(hcp->dup_len);
+		if (hcp->dup_off >= hcp->dup_tlen) {
+			F_CLR(hcp, H_ISDUP);
+			hcp->indx += 2;
+		}
+	} else if (F_ISSET(hcp, H_DUPONLY)) {
+		F_CLR(hcp, H_OK);
+		F_SET(hcp, H_NOMORE);
+		return (0);
+	} else {
+		hcp->indx += 2;
+		F_CLR(hcp, H_ISDUP);
+	}
+
+	return (__ham_item(dbc, mode, pgnop));
+}
+
+/*
+ * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
+ *
+ * This is a little bit sleazy in that we're overloading the meaning
+ * of the H_OFFPAGE type here.  When we recover deletes, we have the
+ * entire entry instead of having only the DBT, so we'll pass type
+ * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
+ * an H_KEYDATA around it.
+ */
+void
+__ham_putitem(p, dbt, type)
+	PAGE *p;
+	const DBT *dbt;
+	int type;
+{
+	u_int16_t n, off;
+
+	n = NUM_ENT(p);
+
+	/* Put the item element on the page. */
+	if (type == H_OFFPAGE) {
+		off = HOFFSET(p) - dbt->size;
+		HOFFSET(p) = p->inp[n] = off;
+		memcpy(P_ENTRY(p, n), dbt->data, dbt->size);
+	} else {
+		off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
+		HOFFSET(p) = p->inp[n] = off;
+		PUT_HKEYDATA(P_ENTRY(p, n), dbt->data, dbt->size, type);
+	}
+
+	/* Adjust page info. */
+	NUM_ENT(p) += 1;
+}
+
+/*
+ * PUBLIC: void __ham_reputpair
+ * PUBLIC:    __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *));
+ *
+ * This is a special case to restore a key/data pair to its original
+ * location during recovery.  We are guaranteed that the pair fits
+ * on the page and is not the last pair on the page (because if it's
+ * the last pair, the normal insert works).
+ */
+void
+__ham_reputpair(p, psize, ndx, key, data)
+	PAGE *p;
+	u_int32_t psize, ndx;
+	const DBT *key, *data;
+{
+	db_indx_t i, movebytes, newbytes;
+	u_int8_t *from;
+
+	/* First shuffle the existing items up on the page.  */
+	movebytes =
+	    (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 2)]) - HOFFSET(p);
+	newbytes = key->size + data->size;
+	from = (u_int8_t *)p + HOFFSET(p);
+	memmove(from - newbytes, from, movebytes);
+
+	/*
+	 * Adjust the indices and move them up 2 spaces. Note that we
+	 * have to check the exit condition inside the loop just in case
+	 * we are dealing with index 0 (db_indx_t's are unsigned).
+	 */
+	for (i = NUM_ENT(p) - 1; ; i-- ) {
+		p->inp[i + 2] = p->inp[i] - newbytes;
+		if (i == H_KEYINDEX(ndx))
+			break;
+	}
+
+	/* Put the key and data on the page. */
+	p->inp[H_KEYINDEX(ndx)] =
+	    (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 2)]) - key->size;
+	p->inp[H_DATAINDEX(ndx)] = p->inp[H_KEYINDEX(ndx)] - data->size;
+	memcpy(P_ENTRY(p, H_KEYINDEX(ndx)), key->data, key->size);
+	memcpy(P_ENTRY(p, H_DATAINDEX(ndx)), data->data, data->size);
+
+	/* Adjust page info. */
+	HOFFSET(p) -= newbytes;
+	NUM_ENT(p) += 2;
+}
+
+/*
+ * PUBLIC: int __ham_del_pair __P((DBC *, int));
+ */
+int
+__ham_del_pair(dbc, reclaim_page)
+	DBC *dbc;
+	int reclaim_page;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	DBT data_dbt, key_dbt;
+	DB_ENV *dbenv;
+	DB_LSN new_lsn, *n_lsn, tmp_lsn;
+	PAGE *n_pagep, *nn_pagep, *p, *p_pagep;
+	db_indx_t ndx;
+	db_pgno_t chg_pgno, pgno, tmp_pgno;
+	int ret, t_ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	dbenv = dbp->dbenv;
+	ndx = hcp->indx;
+
+	n_pagep = p_pagep = nn_pagep = NULL;
+
+	if (hcp->page == NULL && (ret = memp_fget(dbp->mpf,
+	    &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
+		return (ret);
+	p = hcp->page;
+
+	/*
+	 * We optimize for the normal case which is when neither the key nor
+	 * the data are large.  In this case, we write a single log record
+	 * and do the delete.  If either is large, we'll call __big_delete
+	 * to remove the big item and then update the page to remove the
+	 * entry referring to the big item.
+	 */
+	ret = 0;
+	if (HPAGE_PTYPE(H_PAIRKEY(p, ndx)) == H_OFFPAGE) {
+		memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(p, H_KEYINDEX(ndx))),
+		    sizeof(db_pgno_t));
+		ret = __db_doff(dbc, pgno);
+	}
+
+	if (ret == 0)
+		switch (HPAGE_PTYPE(H_PAIRDATA(p, ndx))) {
+		case H_OFFPAGE:
+			memcpy(&pgno,
+			    HOFFPAGE_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
+			    sizeof(db_pgno_t));
+			ret = __db_doff(dbc, pgno);
+			break;
+		case H_OFFDUP:
+		case H_DUPLICATE:
+			/*
+			 * If we delete a pair that is/was a duplicate, then
+			 * we had better clear the flag so that we update the
+			 * cursor appropriately.
+			 */
+			F_CLR(hcp, H_ISDUP);
+			break;
+		}
+
+	if (ret)
+		return (ret);
+
+	/* Now log the delete off this page. */
+	if (DB_LOGGING(dbc)) {
+		key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx));
+		key_dbt.size = LEN_HITEM(p, dbp->pgsize, H_KEYINDEX(ndx));
+		data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx));
+		data_dbt.size = LEN_HITEM(p, dbp->pgsize, H_DATAINDEX(ndx));
+
+		if ((ret = __ham_insdel_log(dbenv,
+		    dbc->txn, &new_lsn, 0, DELPAIR,
+		    dbp->log_fileid, PGNO(p), (u_int32_t)ndx,
+		    &LSN(p), &key_dbt, &data_dbt)) != 0)
+			return (ret);
+
+		/* Move lsn onto page. */
+		LSN(p) = new_lsn;
+	}
+
+	/* Do the delete. */
+	__ham_dpair(dbp, p, ndx);
+
+	/*
+	 * Mark item deleted so that we don't try to return it, and
+	 * so that we update the cursor correctly on the next call
+	 * to next.
+	 */
+	F_SET(hcp, H_DELETED);
+	F_CLR(hcp, H_OK);
+
+	/*
+	 * Update cursors that are on the page where the delete happend.
+	 */
+	if ((ret = __ham_c_update(dbc, 0, 0, 0)) != 0)
+		return (ret);
+
+	/*
+	 * If we are locking, we will not maintain this, because it is
+	 * a hot spot.
+	 *
+	 * XXX
+	 * Perhaps we can retain incremental numbers and apply them later.
+	 */
+	if (!STD_LOCKING(dbc))
+		--hcp->hdr->nelem;
+
+	/*
+	 * If we need to reclaim the page, then check if the page is empty.
+	 * There are two cases.  If it's empty and it's not the first page
+	 * in the bucket (i.e., the bucket page) then we can simply remove
+	 * it. If it is the first chain in the bucket, then we need to copy
+	 * the second page into it and remove the second page.
+	 * If its the only page in the bucket we leave it alone.
+	 */
+	if (!reclaim_page ||
+	    NUM_ENT(p) != 0 ||
+	    (PREV_PGNO(p) == PGNO_INVALID && NEXT_PGNO(p) == PGNO_INVALID))
+		return (memp_fset(dbp->mpf, p, DB_MPOOL_DIRTY));
+
+	if (PREV_PGNO(p) == PGNO_INVALID) {
+		/*
+		 * First page in chain is empty and we know that there
+		 * are more pages in the chain.
+		 */
+		if ((ret =
+		    memp_fget(dbp->mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0)
+			return (ret);
+
+		if (NEXT_PGNO(n_pagep) != PGNO_INVALID &&
+		    (ret = memp_fget(dbp->mpf, &NEXT_PGNO(n_pagep), 0,
+		    &nn_pagep)) != 0)
+			goto err;
+
+		if (DB_LOGGING(dbc)) {
+			key_dbt.data = n_pagep;
+			key_dbt.size = dbp->pgsize;
+			if ((ret = __ham_copypage_log(dbenv,
+			    dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(p),
+			    &LSN(p), PGNO(n_pagep), &LSN(n_pagep),
+			    NEXT_PGNO(n_pagep),
+			    nn_pagep == NULL ? NULL : &LSN(nn_pagep),
+			    &key_dbt)) != 0)
+				goto err;
+
+			/* Move lsn onto page. */
+			LSN(p) = new_lsn;	/* Structure assignment. */
+			LSN(n_pagep) = new_lsn;
+			if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
+				LSN(nn_pagep) = new_lsn;
+		}
+		if (nn_pagep != NULL) {
+			PREV_PGNO(nn_pagep) = PGNO(p);
+			if ((ret = memp_fput(dbp->mpf,
+			    nn_pagep, DB_MPOOL_DIRTY)) != 0) {
+				nn_pagep = NULL;
+				goto err;
+			}
+		}
+
+		tmp_pgno = PGNO(p);
+		tmp_lsn = LSN(p);
+		memcpy(p, n_pagep, dbp->pgsize);
+		PGNO(p) = tmp_pgno;
+		LSN(p) = tmp_lsn;
+		PREV_PGNO(p) = PGNO_INVALID;
+
+		/*
+		 * Update cursors to reflect the fact that records
+		 * on the second page have moved to the first page.
+		 */
+		if ((ret = __ham_c_chgpg(dbc,
+		    PGNO(n_pagep), NDX_INVALID, PGNO(p), NDX_INVALID)) != 0)
+			return (ret);
+
+		/*
+		 * Update the cursor to reflect its new position.
+		 */
+		hcp->indx = 0;
+		hcp->pgno = PGNO(p);
+		if ((ret = memp_fset(dbp->mpf, p, DB_MPOOL_DIRTY)) != 0 ||
+		    (ret = __db_free(dbc, n_pagep)) != 0)
+			return (ret);
+	} else {
+		if ((ret =
+		    memp_fget(dbp->mpf, &PREV_PGNO(p), 0, &p_pagep)) != 0)
+			goto err;
+
+		if (NEXT_PGNO(p) != PGNO_INVALID) {
+			if ((ret = memp_fget(dbp->mpf,
+			    &NEXT_PGNO(p), 0, &n_pagep)) != 0)
+				goto err;
+			n_lsn = &LSN(n_pagep);
+		} else {
+			n_pagep = NULL;
+			n_lsn = NULL;
+		}
+
+		NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
+		if (n_pagep != NULL)
+			PREV_PGNO(n_pagep) = PGNO(p_pagep);
+
+		if (DB_LOGGING(dbc)) {
+			if ((ret = __ham_newpage_log(dbenv,
+			    dbc->txn, &new_lsn, 0, DELOVFL,
+			    dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep),
+			    PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
+				goto err;
+
+			/* Move lsn onto page. */
+			LSN(p_pagep) = new_lsn;	/* Structure assignment. */
+			if (n_pagep)
+				LSN(n_pagep) = new_lsn;
+			LSN(p) = new_lsn;
+		}
+		if (NEXT_PGNO(p) == PGNO_INVALID) {
+			/*
+			 * There is no next page; put the cursor on the
+			 * previous page as if we'd deleted the last item
+			 * on that page; index greater than number of
+			 * valid entries and H_DELETED set.
+			 */
+			hcp->pgno = PGNO(p_pagep);
+			hcp->indx = NUM_ENT(p_pagep);
+			F_SET(hcp, H_DELETED);
+		} else {
+			hcp->pgno = NEXT_PGNO(p);
+			hcp->indx = 0;
+		}
+
+		/*
+		 * Since we are about to delete the cursor page and we have
+		 * just moved the cursor, we need to make sure that the
+		 * old page pointer isn't left hanging around in the cursor.
+		 */
+		hcp->page = NULL;
+		chg_pgno = PGNO(p);
+		ret = __db_free(dbc, p);
+		if ((t_ret = memp_fput(dbp->mpf, p_pagep, DB_MPOOL_DIRTY)) != 0
+		    && ret == 0)
+			ret = t_ret;
+		if (n_pagep != NULL && (t_ret = memp_fput(dbp->mpf,
+		    n_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+			ret = t_ret;
+		if (ret != 0)
+			return (ret);
+		ret = __ham_c_chgpg(dbc,
+		    chg_pgno, 0, hcp->pgno, hcp->indx);
+	}
+	return (ret);
+
+err:	/* Clean up any pages. */
+	if (n_pagep != NULL)
+		(void)memp_fput(dbp->mpf, n_pagep, 0);
+	if (nn_pagep != NULL)
+		(void)memp_fput(dbp->mpf, nn_pagep, 0);
+	if (p_pagep != NULL)
+		(void)memp_fput(dbp->mpf, p_pagep, 0);
+	return (ret);
+}
+
+/*
+ * __ham_replpair --
+ *	Given the key data indicated by the cursor, replace part/all of it
+ *	according to the fields in the dbt.
+ *
+ * PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t));
+ */
+int
+__ham_replpair(dbc, dbt, make_dup)
+	DBC *dbc;
+	DBT *dbt;
+	u_int32_t make_dup;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	DBT old_dbt, tdata, tmp;
+	DB_LSN	new_lsn;
+	int32_t change;			/* XXX: Possible overflow. */
+	u_int32_t dup, len, memsize;
+	int is_big, ret, type;
+	u_int8_t *beg, *dest, *end, *hk, *src;
+	void *memp;
+
+	/*
+	 * Big item replacements are handled in generic code.
+	 * Items that fit on the current page fall into 4 classes.
+	 * 1. On-page element, same size
+	 * 2. On-page element, new is bigger (fits)
+	 * 3. On-page element, new is bigger (does not fit)
+	 * 4. On-page element, old is bigger
+	 * Numbers 1, 2, and 4 are essentially the same (and should
+	 * be the common case).  We handle case 3 as a delete and
+	 * add.
+	 */
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	/*
+	 * We need to compute the number of bytes that we are adding or
+	 * removing from the entry.  Normally, we can simply substract
+	 * the number of bytes we are replacing (dbt->dlen) from the
+	 * number of bytes we are inserting (dbt->size).  However, if
+	 * we are doing a partial put off the end of a record, then this
+	 * formula doesn't work, because we are essentially adding
+	 * new bytes.
+	 */
+	change = dbt->size - dbt->dlen;
+
+	hk = H_PAIRDATA(hcp->page, hcp->indx);
+	is_big = HPAGE_PTYPE(hk) == H_OFFPAGE;
+
+	if (is_big)
+		memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
+	else
+		len = LEN_HKEYDATA(hcp->page,
+		    dbp->pgsize, H_DATAINDEX(hcp->indx));
+
+	if (dbt->doff + dbt->dlen > len)
+		change += dbt->doff + dbt->dlen - len;
+
+	if (change > (int32_t)P_FREESPACE(hcp->page) || is_big) {
+		/*
+		 * Case 3 -- two subcases.
+		 * A. This is not really a partial operation, but an overwrite.
+		 *    Simple del and add works.
+		 * B. This is a partial and we need to construct the data that
+		 *    we are really inserting (yuck).
+		 * In both cases, we need to grab the key off the page (in
+		 * some cases we could do this outside of this routine; for
+		 * cleanliness we do it here.  If you happen to be on a big
+		 * key, this could be a performance hit).
+		 */
+		memset(&tmp, 0, sizeof(tmp));
+		if ((ret =
+		    __db_ret(dbp, hcp->page, H_KEYINDEX(hcp->indx),
+		    &tmp, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
+			return (ret);
+
+		/* Preserve duplicate info. */
+		dup = F_ISSET(hcp, H_ISDUP);
+		if (dbt->doff == 0 && dbt->dlen == len) {
+			ret = __ham_del_pair(dbc, 0);
+			if (ret == 0)
+			    ret = __ham_add_el(dbc,
+				&tmp, dbt, dup ? H_DUPLICATE : H_KEYDATA);
+		} else {					/* Case B */
+			type = HPAGE_PTYPE(hk) != H_OFFPAGE ?
+			    HPAGE_PTYPE(hk) : H_KEYDATA;
+			memset(&tdata, 0, sizeof(tdata));
+			memp = NULL;
+			memsize = 0;
+			if ((ret = __db_ret(dbp, hcp->page,
+			    H_DATAINDEX(hcp->indx), &tdata, &memp, &memsize))
+			    != 0)
+				goto err;
+
+			/* Now we can delete the item. */
+			if ((ret = __ham_del_pair(dbc, 0)) != 0) {
+				__os_free(memp, memsize);
+				goto err;
+			}
+
+			/* Now shift old data around to make room for new. */
+			if (change > 0) {
+				 if ((ret = __os_realloc(dbp->dbenv,
+				     tdata.size + change,
+				     NULL, &tdata.data)) != 0)
+					return (ret);
+				memp = tdata.data;
+				memsize = tdata.size + change;
+				memset((u_int8_t *)tdata.data + tdata.size,
+				    0, change);
+			}
+			end = (u_int8_t *)tdata.data + tdata.size;
+
+			src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
+			if (src < end && tdata.size > dbt->doff + dbt->dlen) {
+				len = tdata.size - dbt->doff - dbt->dlen;
+				dest = src + change;
+				memmove(dest, src, len);
+			}
+			memcpy((u_int8_t *)tdata.data + dbt->doff,
+			    dbt->data, dbt->size);
+			tdata.size += change;
+
+			/* Now add the pair. */
+			ret = __ham_add_el(dbc, &tmp, &tdata, type);
+			__os_free(memp, memsize);
+		}
+		F_SET(hcp, dup);
+err:		return (ret);
+	}
+
+	/*
+	 * Set up pointer into existing data. Do it before the log
+	 * message so we can use it inside of the log setup.
+	 */
+	beg = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
+	beg += dbt->doff;
+
+	/*
+	 * If we are going to have to move bytes at all, figure out
+	 * all the parameters here.  Then log the call before moving
+	 * anything around.
+	 */
+	if (DB_LOGGING(dbc)) {
+		old_dbt.data = beg;
+		old_dbt.size = dbt->dlen;
+		if ((ret = __ham_replace_log(dbp->dbenv,
+		    dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(hcp->page),
+		    (u_int32_t)H_DATAINDEX(hcp->indx), &LSN(hcp->page),
+		    (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
+			return (ret);
+
+		LSN(hcp->page) = new_lsn;	/* Structure assignment. */
+	}
+
+	__ham_onpage_replace(hcp->page, dbp->pgsize,
+	    (u_int32_t)H_DATAINDEX(hcp->indx), (int32_t)dbt->doff, change, dbt);
+
+	return (0);
+}
+
+/*
+ * Replace data on a page with new data, possibly growing or shrinking what's
+ * there.  This is called on two different occasions. On one (from replpair)
+ * we are interested in changing only the data.  On the other (from recovery)
+ * we are replacing the entire data (header and all) with a new element.  In
+ * the latter case, the off argument is negative.
+ * pagep: the page that we're changing
+ * ndx: page index of the element that is growing/shrinking.
+ * off: Offset at which we are beginning the replacement.
+ * change: the number of bytes (+ or -) that the element is growing/shrinking.
+ * dbt: the new data that gets written at beg.
+ * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
+ * PUBLIC:     int32_t,  DBT *));
+ */
+void
+__ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
+	PAGE *pagep;
+	size_t pgsize;
+	u_int32_t ndx;
+	int32_t off;
+	int32_t change;
+	DBT *dbt;
+{
+	db_indx_t i;
+	int32_t len;
+	u_int8_t *src, *dest;
+	int zero_me;
+
+	if (change != 0) {
+		zero_me = 0;
+		src = (u_int8_t *)(pagep) + HOFFSET(pagep);
+		if (off < 0)
+			len = pagep->inp[ndx] - HOFFSET(pagep);
+		else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) {
+			len = HKEYDATA_DATA(P_ENTRY(pagep, ndx)) +
+			    LEN_HKEYDATA(pagep, pgsize, ndx) - src;
+			zero_me = 1;
+		} else
+			len = (HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off) - src;
+		dest = src - change;
+		memmove(dest, src, len);
+		if (zero_me)
+			memset(dest + len, 0, change);
+
+		/* Now update the indices. */
+		for (i = ndx; i < NUM_ENT(pagep); i++)
+			pagep->inp[i] -= change;
+		HOFFSET(pagep) -= change;
+	}
+	if (off >= 0)
+		memcpy(HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off,
+		    dbt->data, dbt->size);
+	else
+		memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size);
+}
+
+/*
+ * PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t));
+ */
+int
+__ham_split_page(dbc, obucket, nbucket)
+	DBC *dbc;
+	u_int32_t obucket, nbucket;
+{
+	DB *dbp;
+	DBC **carray;
+	HASH_CURSOR *hcp, *cp;
+	DBT key, page_dbt;
+	DB_ENV *dbenv;
+	DB_LSN new_lsn;
+	PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
+	db_indx_t n;
+	db_pgno_t bucket_pgno, npgno, next_pgno;
+	u_int32_t big_len, len;
+	int found, i, ret, t_ret;
+	void *big_buf;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	dbenv = dbp->dbenv;
+	temp_pagep = old_pagep = new_pagep = NULL;
+
+	if ((ret = __ham_get_clist(dbp, obucket, NDX_INVALID, &carray)) != 0)
+		return (ret);
+
+	bucket_pgno = BUCKET_TO_PAGE(hcp, obucket);
+	if ((ret = memp_fget(dbp->mpf,
+	    &bucket_pgno, DB_MPOOL_CREATE, &old_pagep)) != 0)
+		goto err;
+
+	/* Properly initialize the new bucket page. */
+	npgno = BUCKET_TO_PAGE(hcp, nbucket);
+	if ((ret = memp_fget(dbp->mpf,
+	    &npgno, DB_MPOOL_CREATE, &new_pagep)) != 0)
+		goto err;
+	P_INIT(new_pagep,
+	    dbp->pgsize, npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+
+	temp_pagep = hcp->split_buf;
+	memcpy(temp_pagep, old_pagep, dbp->pgsize);
+
+	if (DB_LOGGING(dbc)) {
+		page_dbt.size = dbp->pgsize;
+		page_dbt.data = old_pagep;
+		if ((ret = __ham_splitdata_log(dbenv,
+		    dbc->txn, &new_lsn, 0, dbp->log_fileid, SPLITOLD,
+		    PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0)
+			goto err;
+	}
+
+	P_INIT(old_pagep, dbp->pgsize, PGNO(old_pagep), PGNO_INVALID,
+	    PGNO_INVALID, 0, P_HASH);
+
+	if (DB_LOGGING(dbc))
+		LSN(old_pagep) = new_lsn;	/* Structure assignment. */
+
+	big_len = 0;
+	big_buf = NULL;
+	key.flags = 0;
+	while (temp_pagep != NULL) {
+		for (n = 0; n < (db_indx_t)NUM_ENT(temp_pagep); n += 2) {
+			if ((ret =
+			    __db_ret(dbp, temp_pagep, H_KEYINDEX(n),
+			    &key, &big_buf, &big_len)) != 0)
+				goto err;
+
+			if (__ham_call_hash(dbc, key.data, key.size)
+			    == obucket)
+				pp = &old_pagep;
+			else
+				pp = &new_pagep;
+
+			/*
+			 * Figure out how many bytes we need on the new
+			 * page to store the key/data pair.
+			 */
+
+			len = LEN_HITEM(temp_pagep, dbp->pgsize,
+			    H_DATAINDEX(n)) +
+			    LEN_HITEM(temp_pagep, dbp->pgsize,
+			    H_KEYINDEX(n)) +
+			    2 * sizeof(db_indx_t);
+
+			if (P_FREESPACE(*pp) < len) {
+				if (DB_LOGGING(dbc)) {
+					page_dbt.size = dbp->pgsize;
+					page_dbt.data = *pp;
+					if ((ret = __ham_splitdata_log(
+					    dbenv, dbc->txn,
+					    &new_lsn, 0, dbp->log_fileid,
+					    SPLITNEW, PGNO(*pp), &page_dbt,
+					    &LSN(*pp))) != 0)
+						goto err;
+					LSN(*pp) = new_lsn;
+				}
+				if ((ret =
+				    __ham_add_ovflpage(dbc, *pp, 1, pp)) != 0)
+					goto err;
+			}
+
+			/* Check if we need to update a cursor. */
+			if (carray != NULL) {
+				found = 0;
+				for (i = 0; carray[i] != NULL; i++) {
+					cp =
+					    (HASH_CURSOR *)carray[i]->internal;
+					if (cp->pgno == PGNO(temp_pagep)
+					    && cp->indx == n) {
+						cp->pgno = PGNO(*pp);
+						cp->indx = NUM_ENT(*pp);
+						found = 1;
+					}
+				}
+				if (found && DB_LOGGING(dbc)
+				    && IS_SUBTRANSACTION(dbc->txn)) {
+					if ((ret =
+					    __ham_chgpg_log(dbp->dbenv,
+					    dbc->txn, &new_lsn, 0,
+					    dbp->log_fileid,
+					    DB_HAM_SPLIT, PGNO(temp_pagep),
+					    PGNO(*pp), n, NUM_ENT(*pp))) != 0)
+						goto err;
+				}
+			}
+			__ham_copy_item(dbp->pgsize,
+			    temp_pagep, H_KEYINDEX(n), *pp);
+			__ham_copy_item(dbp->pgsize,
+			    temp_pagep, H_DATAINDEX(n), *pp);
+		}
+		next_pgno = NEXT_PGNO(temp_pagep);
+
+		/* Clear temp_page; if it's a link overflow page, free it. */
+		if (PGNO(temp_pagep) != bucket_pgno && (ret =
+		    __db_free(dbc, temp_pagep)) != 0) {
+			temp_pagep = NULL;
+			goto err;
+		}
+
+		if (next_pgno == PGNO_INVALID)
+			temp_pagep = NULL;
+		else if ((ret = memp_fget(dbp->mpf,
+		    &next_pgno, DB_MPOOL_CREATE, &temp_pagep)) != 0)
+			goto err;
+
+		if (temp_pagep != NULL && DB_LOGGING(dbc)) {
+			page_dbt.size = dbp->pgsize;
+			page_dbt.data = temp_pagep;
+			if ((ret = __ham_splitdata_log(dbenv,
+			    dbc->txn, &new_lsn, 0, dbp->log_fileid,
+			    SPLITOLD, PGNO(temp_pagep),
+			    &page_dbt, &LSN(temp_pagep))) != 0)
+				goto err;
+			LSN(temp_pagep) = new_lsn;
+		}
+	}
+	if (big_buf != NULL)
+		__os_free(big_buf, big_len);
+
+	/*
+	 * If the original bucket spanned multiple pages, then we've got
+	 * a pointer to a page that used to be on the bucket chain.  It
+	 * should be deleted.
+	 */
+	if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
+	    (ret = __db_free(dbc, temp_pagep)) != 0) {
+		temp_pagep = NULL;
+		goto err;
+	}
+
+	/*
+	 * Write new buckets out.
+	 */
+	if (DB_LOGGING(dbc)) {
+		page_dbt.size = dbp->pgsize;
+		page_dbt.data = old_pagep;
+		if ((ret = __ham_splitdata_log(dbenv, dbc->txn, &new_lsn, 0,
+		    dbp->log_fileid, SPLITNEW, PGNO(old_pagep), &page_dbt,
+		    &LSN(old_pagep))) != 0)
+			goto err;
+		LSN(old_pagep) = new_lsn;
+
+		page_dbt.data = new_pagep;
+		if ((ret = __ham_splitdata_log(dbenv, dbc->txn, &new_lsn, 0,
+		    dbp->log_fileid, SPLITNEW, PGNO(new_pagep), &page_dbt,
+		    &LSN(new_pagep))) != 0)
+			goto err;
+		LSN(new_pagep) = new_lsn;
+	}
+	ret = memp_fput(dbp->mpf, old_pagep, DB_MPOOL_DIRTY);
+	if ((t_ret = memp_fput(dbp->mpf, new_pagep, DB_MPOOL_DIRTY)) != 0
+	    && ret == 0)
+		ret = t_ret;
+
+	if (0) {
+err:		if (old_pagep != NULL)
+			(void)memp_fput(dbp->mpf, old_pagep, DB_MPOOL_DIRTY);
+		if (new_pagep != NULL)
+			(void)memp_fput(dbp->mpf, new_pagep, DB_MPOOL_DIRTY);
+		if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
+			(void)memp_fput(dbp->mpf, temp_pagep, DB_MPOOL_DIRTY);
+	}
+	if (carray != NULL)		/* We never knew its size. */
+		__os_free(carray, 0);
+	return (ret);
+}
+
+/*
+ * Add the given pair to the page.  The page in question may already be
+ * held (i.e. it was already gotten).  If it is, then the page is passed
+ * in via the pagep parameter.  On return, pagep will contain the page
+ * to which we just added something.  This allows us to link overflow
+ * pages and return the new page having correctly put the last page.
+ *
+ * PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, int));
+ */
+int
+__ham_add_el(dbc, key, val, type)
+	DBC *dbc;
+	const DBT *key, *val;
+	int type;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	const DBT *pkey, *pdata;
+	DBT key_dbt, data_dbt;
+	DB_LSN new_lsn;
+	HOFFPAGE doff, koff;
+	db_pgno_t next_pgno, pgno;
+	u_int32_t data_size, key_size, pairsize, rectype;
+	int do_expand, is_keybig, is_databig, ret;
+	int key_type, data_type;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	do_expand = 0;
+
+	pgno = hcp->seek_found_page != PGNO_INVALID ?  hcp->seek_found_page :
+	    hcp->pgno;
+	if (hcp->page == NULL && (ret = memp_fget(dbp->mpf, &pgno,
+	    DB_MPOOL_CREATE, &hcp->page)) != 0)
+		return (ret);
+
+	key_size = HKEYDATA_PSIZE(key->size);
+	data_size = HKEYDATA_PSIZE(val->size);
+	is_keybig = ISBIG(hcp, key->size);
+	is_databig = ISBIG(hcp, val->size);
+	if (is_keybig)
+		key_size = HOFFPAGE_PSIZE;
+	if (is_databig)
+		data_size = HOFFPAGE_PSIZE;
+
+	pairsize = key_size + data_size;
+
+	/* Advance to first page in chain with room for item. */
+	while (H_NUMPAIRS(hcp->page) && NEXT_PGNO(hcp->page) != PGNO_INVALID) {
+		/*
+		 * This may not be the end of the chain, but the pair may fit
+		 * anyway.  Check if it's a bigpair that fits or a regular
+		 * pair that fits.
+		 */
+		if (P_FREESPACE(hcp->page) >= pairsize)
+			break;
+		next_pgno = NEXT_PGNO(hcp->page);
+		if ((ret =
+		    __ham_next_cpage(dbc, next_pgno, 0)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * Check if we need to allocate a new page.
+	 */
+	if (P_FREESPACE(hcp->page) < pairsize) {
+		do_expand = 1;
+		if ((ret = __ham_add_ovflpage(dbc,
+		    (PAGE *)hcp->page, 1, (PAGE **)&hcp->page)) !=  0)
+			return (ret);
+		hcp->pgno = PGNO(hcp->page);
+	}
+
+	/*
+	 * Update cursor.
+	 */
+	hcp->indx = NUM_ENT(hcp->page);
+	F_CLR(hcp, H_DELETED);
+	if (is_keybig) {
+		koff.type = H_OFFPAGE;
+		UMRW_SET(koff.unused[0]);
+		UMRW_SET(koff.unused[1]);
+		UMRW_SET(koff.unused[2]);
+		if ((ret = __db_poff(dbc, key, &koff.pgno)) != 0)
+			return (ret);
+		koff.tlen = key->size;
+		key_dbt.data = &koff;
+		key_dbt.size = sizeof(koff);
+		pkey = &key_dbt;
+		key_type = H_OFFPAGE;
+	} else {
+		pkey = key;
+		key_type = H_KEYDATA;
+	}
+
+	if (is_databig) {
+		doff.type = H_OFFPAGE;
+		UMRW_SET(doff.unused[0]);
+		UMRW_SET(doff.unused[1]);
+		UMRW_SET(doff.unused[2]);
+		if ((ret = __db_poff(dbc, val, &doff.pgno)) != 0)
+			return (ret);
+		doff.tlen = val->size;
+		data_dbt.data = &doff;
+		data_dbt.size = sizeof(doff);
+		pdata = &data_dbt;
+		data_type = H_OFFPAGE;
+	} else {
+		pdata = val;
+		data_type = type;
+	}
+
+	if (DB_LOGGING(dbc)) {
+		rectype = PUTPAIR;
+		if (is_databig)
+			rectype |= PAIR_DATAMASK;
+		if (is_keybig)
+			rectype |= PAIR_KEYMASK;
+		if (type == H_DUPLICATE)
+			rectype |= PAIR_DUPMASK;
+
+		if ((ret = __ham_insdel_log(dbp->dbenv, dbc->txn, &new_lsn, 0,
+		    rectype, dbp->log_fileid, PGNO(hcp->page),
+		    (u_int32_t)NUM_ENT(hcp->page), &LSN(hcp->page), pkey,
+		    pdata)) != 0)
+			return (ret);
+
+		/* Move lsn onto page. */
+		LSN(hcp->page) = new_lsn;	/* Structure assignment. */
+	}
+
+	__ham_putitem(hcp->page, pkey, key_type);
+	__ham_putitem(hcp->page, pdata, data_type);
+
+	/*
+	 * For splits, we are going to update item_info's page number
+	 * field, so that we can easily return to the same page the
+	 * next time we come in here.  For other operations, this shouldn't
+	 * matter, since odds are this is the last thing that happens before
+	 * we return to the user program.
+	 */
+	hcp->pgno = PGNO(hcp->page);
+
+	/*
+	 * XXX
+	 * Maybe keep incremental numbers here.
+	 */
+	if (!STD_LOCKING(dbc))
+		hcp->hdr->nelem++;
+
+	if (do_expand || (hcp->hdr->ffactor != 0 &&
+	    (u_int32_t)H_NUMPAIRS(hcp->page) > hcp->hdr->ffactor))
+		F_SET(hcp, H_EXPAND);
+	return (0);
+}
+
+/*
+ * Special __putitem call used in splitting -- copies one entry to
+ * another.  Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
+ * H_DUPLICATE, H_OFFDUP).  Since we log splits at a high level, we
+ * do not need to do any logging here.
+ *
+ * PUBLIC: void __ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *));
+ */
+void
+__ham_copy_item(pgsize, src_page, src_ndx, dest_page)
+	size_t pgsize;
+	PAGE *src_page;
+	u_int32_t src_ndx;
+	PAGE *dest_page;
+{
+	u_int32_t len;
+	void *src, *dest;
+
+	/*
+	 * Copy the key and data entries onto this new page.
+	 */
+	src = P_ENTRY(src_page, src_ndx);
+
+	/* Set up space on dest. */
+	len = LEN_HITEM(src_page, pgsize, src_ndx);
+	HOFFSET(dest_page) -= len;
+	dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
+	dest = P_ENTRY(dest_page, NUM_ENT(dest_page));
+	NUM_ENT(dest_page)++;
+
+	memcpy(dest, src, len);
+}
+
+/*
+ *
+ * Returns:
+ *      pointer on success
+ *      NULL on error
+ *
+ * PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **));
+ */
+int
+__ham_add_ovflpage(dbc, pagep, release, pp)
+	DBC *dbc;
+	PAGE *pagep;
+	int release;
+	PAGE **pp;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	DB_LSN new_lsn;
+	PAGE *new_pagep;
+	int ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if ((ret = __db_new(dbc, P_HASH, &new_pagep)) != 0)
+		return (ret);
+
+	if (DB_LOGGING(dbc)) {
+		if ((ret = __ham_newpage_log(dbp->dbenv, dbc->txn, &new_lsn, 0,
+		    PUTOVFL, dbp->log_fileid, PGNO(pagep), &LSN(pagep),
+		    PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
+			return (ret);
+
+		/* Move lsn onto page. */
+		LSN(pagep) = LSN(new_pagep) = new_lsn;
+	}
+	NEXT_PGNO(pagep) = PGNO(new_pagep);
+	PREV_PGNO(new_pagep) = PGNO(pagep);
+
+	if (release)
+		ret = memp_fput(dbp->mpf, pagep, DB_MPOOL_DIRTY);
+
+	*pp = new_pagep;
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t));
+ */
+int
+__ham_get_cpage(dbc, mode)
+	DBC *dbc;
+	db_lockmode_t mode;
+{
+	DB *dbp;
+	DB_LOCK tmp_lock;
+	HASH_CURSOR *hcp;
+	int ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	ret = 0;
+
+	/*
+	 * There are four cases with respect to buckets and locks.
+	 * 1. If there is no lock held, then if we are locking, we should
+	 *    get the lock.
+	 * 2. If there is a lock held, it's for the current bucket, and it's
+	 *    for the right mode, we don't need to do anything.
+	 * 3. If there is a lock held for the current bucket but it's not
+	 *    strong enough, we need to upgrade.
+	 * 4. If there is a lock, but it's for a different bucket, then we need
+	 *    to release the existing lock and get a new lock.
+	 */
+	tmp_lock.off = LOCK_INVALID;
+	if (STD_LOCKING(dbc)) {
+		if (hcp->lock.off != LOCK_INVALID &&
+		    hcp->lbucket != hcp->bucket) {		/* Case 4 */
+			if (dbc->txn == NULL &&
+			    (ret = lock_put(dbp->dbenv, &hcp->lock)) != 0)
+				return (ret);
+			hcp->lock.off = LOCK_INVALID;
+		}
+		if ((hcp->lock.off != LOCK_INVALID &&
+		    (hcp->lock_mode == DB_LOCK_READ &&
+		    mode == DB_LOCK_WRITE))) {
+			/* Case 3. */
+			tmp_lock = hcp->lock;
+			hcp->lock.off = LOCK_INVALID;
+		}
+
+		/* Acquire the lock. */
+		if (hcp->lock.off == LOCK_INVALID)
+			/* Cases 1, 3, and 4. */
+			if ((ret = __ham_lock_bucket(dbc, mode)) != 0)
+				return (ret);
+
+		if (ret == 0) {
+			hcp->lock_mode = mode;
+			hcp->lbucket = hcp->bucket;
+			if (tmp_lock.off != LOCK_INVALID)
+				/* Case 3: release the original lock. */
+				ret = lock_put(dbp->dbenv, &tmp_lock);
+		} else if (tmp_lock.off != LOCK_INVALID)
+			hcp->lock = tmp_lock;
+	}
+
+	if (ret == 0 && hcp->page == NULL) {
+		if (hcp->pgno == PGNO_INVALID)
+			hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+		if ((ret = memp_fget(dbp->mpf,
+		    &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
+			return (ret);
+	}
+
+	return (0);
+}
+
+/*
+ * Get a new page at the cursor, putting the last page if necessary.
+ * If the flag is set to H_ISDUP, then we are talking about the
+ * duplicate page, not the main page.
+ *
+ * PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t, int));
+ */
+int
+__ham_next_cpage(dbc, pgno, dirty)
+	DBC *dbc;
+	db_pgno_t pgno;
+	int dirty;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	PAGE *p;
+	int ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if (hcp->page != NULL && (ret = memp_fput(dbp->mpf,
+	    hcp->page, dirty ? DB_MPOOL_DIRTY : 0)) != 0)
+		return (ret);
+
+	if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &p)) != 0)
+		return (ret);
+
+	hcp->page = p;
+	hcp->pgno = pgno;
+	hcp->indx = 0;
+
+	return (0);
+}
+
+/*
+ * __ham_lock_bucket --
+ *	Get the lock on a particular bucket.
+ *
+ * PUBLIC: int __ham_lock_bucket __P((DBC *, db_lockmode_t));
+ */
+int
+__ham_lock_bucket(dbc, mode)
+	DBC *dbc;
+	db_lockmode_t mode;
+{
+	HASH_CURSOR *hcp;
+	u_int32_t flags;
+	int gotmeta, ret;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	gotmeta = hcp->hdr == NULL ? 1 : 0;
+	if (gotmeta)
+		if ((ret = __ham_get_meta(dbc)) != 0)
+			return (ret);
+	dbc->lock.pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+	if (gotmeta)
+		if ((ret = __ham_release_meta(dbc)) != 0)
+			return (ret);
+
+	flags = 0;
+	if (DB_NONBLOCK(dbc))
+		LF_SET(DB_LOCK_NOWAIT);
+
+	ret = lock_get(dbc->dbp->dbenv,
+		    dbc->locker, flags, &dbc->lock_dbt, mode, &hcp->lock);
+
+	hcp->lock_mode = mode;
+	return (ret);
+}
+
+/*
+ * __ham_dpair --
+ *	Delete a pair on a page, paying no attention to what the pair
+ *	represents.  The caller is responsible for freeing up duplicates
+ *	or offpage entries that might be referenced by this pair.
+ *
+ * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
+ */
+void
+__ham_dpair(dbp, p, indx)
+	DB *dbp;
+	PAGE *p;
+	u_int32_t indx;
+{
+	db_indx_t delta, n;
+	u_int8_t *dest, *src;
+
+	/*
+	 * Compute "delta", the amount we have to shift all of the
+	 * offsets.  To find the delta, we just need to calculate
+	 * the size of the pair of elements we are removing.
+	 */
+	delta = H_PAIRSIZE(p, dbp->pgsize, indx);
+
+	/*
+	 * The hard case: we want to remove something other than
+	 * the last item on the page.  We need to shift data and
+	 * offsets down.
+	 */
+	if ((db_indx_t)indx != NUM_ENT(p) - 2) {
+		/*
+		 * Move the data: src is the first occupied byte on
+		 * the page. (Length is delta.)
+		 */
+		src = (u_int8_t *)p + HOFFSET(p);
+
+		/*
+		 * Destination is delta bytes beyond src.  This might
+		 * be an overlapping copy, so we have to use memmove.
+		 */
+		dest = src + delta;
+		memmove(dest, src, p->inp[H_DATAINDEX(indx)] - HOFFSET(p));
+	}
+
+	/* Adjust page metadata. */
+	HOFFSET(p) = HOFFSET(p) + delta;
+	NUM_ENT(p) = NUM_ENT(p) - 2;
+
+	/* Adjust the offsets. */
+	for (n = (db_indx_t)indx; n < (db_indx_t)(NUM_ENT(p)); n++)
+		p->inp[n] = p->inp[n + 2] + delta;
+
+}
diff --git a/bdb/hash/hash_rec.c b/bdb/hash/hash_rec.c
new file mode 100644
index 00000000000..ded58c281e9
--- /dev/null
+++ b/bdb/hash/hash_rec.c
@@ -0,0 +1,1078 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *	Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *	The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_rec.c,v 11.34 2001/01/11 18:19:52 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "btree.h"
+#include "hash.h"
+#include "lock.h"
+#include "log.h"
+#include "mp.h"
+
+static int __ham_alloc_pages __P((DB *, __ham_groupalloc_args *));
+
+/*
+ * __ham_insdel_recover --
+ *
+ * PUBLIC: int __ham_insdel_recover
+ * PUBLIC:     __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_insdel_recover(dbenv, dbtp, lsnp, op, info)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__ham_insdel_args *argp;
+	DB *file_dbp;
+	DBC *dbc;
+	DB_MPOOLFILE *mpf;
+	PAGE *pagep;
+	u_int32_t opcode;
+	int cmp_n, cmp_p, flags, getmeta, ret, type;
+
+	COMPQUIET(info, NULL);
+
+	getmeta = 0;
+	REC_PRINT(__ham_insdel_print);
+	REC_INTRO(__ham_insdel_read, 1);
+
+	if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+		if (DB_UNDO(op)) {
+			/*
+			 * We are undoing and the page doesn't exist.  That
+			 * is equivalent to having a pagelsn of 0, so we
+			 * would not have to undo anything.  In this case,
+			 * don't bother creating a page.
+			 */
+			goto done;
+		} else if ((ret = memp_fget(mpf, &argp->pgno,
+		    DB_MPOOL_CREATE, &pagep)) != 0)
+			goto out;
+	}
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto out;
+	getmeta = 1;
+
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+	CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+	/*
+	 * Two possible things going on:
+	 * redo a delete/undo a put: delete the item from the page.
+	 * redo a put/undo a delete: add the item to the page.
+	 * If we are undoing a delete, then the information logged is the
+	 * entire entry off the page, not just the data of a dbt.  In
+	 * this case, we want to copy it back onto the page verbatim.
+	 * We do this by calling __putitem with the type H_OFFPAGE instead
+	 * of H_KEYDATA.
+	 */
+	opcode = OPCODE_OF(argp->opcode);
+
+	flags = 0;
+	if ((opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) ||
+	    (opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) {
+		/*
+		 * Need to redo a PUT or undo a delete.  If we are undoing a
+		 * delete, we've got to restore the item back to its original
+		 * position.  That's a royal pain in the butt (because we do
+		 * not store item lengths on the page), but there's no choice.
+		 */
+		if (opcode != DELPAIR ||
+		    argp->ndx == (u_int32_t)NUM_ENT(pagep)) {
+			__ham_putitem(pagep, &argp->key,
+			    DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ?
+			    H_OFFPAGE : H_KEYDATA);
+
+			if (PAIR_ISDATADUP(argp->opcode))
+				type = H_DUPLICATE;
+			else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode))
+				type = H_OFFPAGE;
+			else
+				type = H_KEYDATA;
+			__ham_putitem(pagep, &argp->data, type);
+		} else
+			(void)__ham_reputpair(pagep, file_dbp->pgsize,
+			    argp->ndx, &argp->key, &argp->data);
+
+		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
+		flags = DB_MPOOL_DIRTY;
+
+	} else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op))
+	    || (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) {
+		/* Need to undo a put or redo a delete. */
+		__ham_dpair(file_dbp, pagep, argp->ndx);
+		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
+		flags = DB_MPOOL_DIRTY;
+	}
+
+	if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+		goto out;
+
+	/* Return the previous LSN. */
+done:	*lsnp = argp->prev_lsn;
+	ret = 0;
+
+out:	if (getmeta)
+		(void)__ham_release_meta(dbc);
+	REC_CLOSE;
+}
+
+/*
+ * __ham_newpage_recover --
+ *	This log message is used when we add/remove overflow pages.  This
+ *	message takes care of the pointer chains, not the data on the pages.
+ *
+ * PUBLIC: int __ham_newpage_recover
+ * PUBLIC:     __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_newpage_recover(dbenv, dbtp, lsnp, op, info)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__ham_newpage_args *argp;
+	DB *file_dbp;
+	DBC *dbc;
+	DB_MPOOLFILE *mpf;
+	PAGE *pagep;
+	int cmp_n, cmp_p, flags, getmeta, ret;
+
+	COMPQUIET(info, NULL);
+
+	getmeta = 0;
+	REC_PRINT(__ham_newpage_print);
+	REC_INTRO(__ham_newpage_read, 1);
+
+	if ((ret = memp_fget(mpf, &argp->new_pgno, 0, &pagep)) != 0) {
+		if (DB_UNDO(op)) {
+			/*
+			 * We are undoing and the page doesn't exist.  That
+			 * is equivalent to having a pagelsn of 0, so we
+			 * would not have to undo anything.  In this case,
+			 * don't bother creating a page.
+			 */
+			ret = 0;
+			goto ppage;
+		} else if ((ret = memp_fget(mpf, &argp->new_pgno,
+		    DB_MPOOL_CREATE, &pagep)) != 0)
+			goto out;
+	}
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto out;
+	getmeta = 1;
+
+	/*
+	 * There are potentially three pages we need to check: the one
+	 * that we created/deleted, the one before it and the one after
+	 * it.
+	 */
+
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+	CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+
+	flags = 0;
+	if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
+	    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
+		/* Redo a create new page or undo a delete new page. */
+		P_INIT(pagep, file_dbp->pgsize, argp->new_pgno,
+		    argp->prev_pgno, argp->next_pgno, 0, P_HASH);
+		flags = DB_MPOOL_DIRTY;
+	} else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
+	    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
+		/*
+		 * Redo a delete or undo a create new page.  All we
+		 * really need to do is change the LSN.
+		 */
+		flags = DB_MPOOL_DIRTY;
+	}
+
+	if (flags)
+		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
+
+	if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+		goto out;
+
+	/* Now do the prev page. */
+ppage:	if (argp->prev_pgno != PGNO_INVALID) {
+		if ((ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep)) != 0) {
+			if (DB_UNDO(op)) {
+				/*
+				 * We are undoing and the page doesn't exist.
+				 * That is equivalent to having a pagelsn of 0,
+				 * so we would not have to undo anything.  In
+				 * this case, don't bother creating a page.
+				 */
+				ret = 0;
+				goto npage;
+			} else if ((ret =
+			    memp_fget(mpf, &argp->prev_pgno,
+			    DB_MPOOL_CREATE, &pagep)) != 0)
+				goto out;
+		}
+
+		cmp_n = log_compare(lsnp, &LSN(pagep));
+		cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
+		CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->prevlsn);
+		flags = 0;
+
+		if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
+		    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
+			/* Redo a create new page or undo a delete new page. */
+			pagep->next_pgno = argp->new_pgno;
+			flags = DB_MPOOL_DIRTY;
+		} else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
+		    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
+			/* Redo a delete or undo a create new page. */
+			pagep->next_pgno = argp->next_pgno;
+			flags = DB_MPOOL_DIRTY;
+		}
+
+		if (flags)
+			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
+
+		if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+			goto out;
+	}
+
+	/* Now time to do the next page */
+npage:	if (argp->next_pgno != PGNO_INVALID) {
+		if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
+			if (DB_UNDO(op)) {
+				/*
+				 * We are undoing and the page doesn't exist.
+				 * That is equivalent to having a pagelsn of 0,
+				 * so we would not have to undo anything.  In
+				 * this case, don't bother creating a page.
+				 */
+				goto done;
+			} else if ((ret =
+			    memp_fget(mpf, &argp->next_pgno,
+			    DB_MPOOL_CREATE, &pagep)) != 0)
+				goto out;
+		}
+
+		cmp_n = log_compare(lsnp, &LSN(pagep));
+		cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
+		CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->nextlsn);
+		flags = 0;
+
+		if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
+		    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
+			/* Redo a create new page or undo a delete new page. */
+			pagep->prev_pgno = argp->new_pgno;
+			flags = DB_MPOOL_DIRTY;
+		} else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
+		    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
+			/* Redo a delete or undo a create new page. */
+			pagep->prev_pgno = argp->prev_pgno;
+			flags = DB_MPOOL_DIRTY;
+		}
+
+		if (flags)
+			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
+
+		if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+			goto out;
+	}
+done:	*lsnp = argp->prev_lsn;
+	ret = 0;
+
+out:	if (getmeta)
+		(void)__ham_release_meta(dbc);
+	REC_CLOSE;
+}
+
+/*
+ * __ham_replace_recover --
+ *	This log message refers to partial puts that are local to a single
+ *	page.  You can think of them as special cases of the more general
+ *	insdel log message.
+ *
+ * PUBLIC: int __ham_replace_recover
+ * PUBLIC:    __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_replace_recover(dbenv, dbtp, lsnp, op, info)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__ham_replace_args *argp;
+	DB *file_dbp;
+	DBC *dbc;
+	DB_MPOOLFILE *mpf;
+	DBT dbt;
+	PAGE *pagep;
+	int32_t grow;
+	int cmp_n, cmp_p, flags, getmeta, ret;
+	u_int8_t *hk;
+
+	COMPQUIET(info, NULL);
+
+	getmeta = 0;
+	REC_PRINT(__ham_replace_print);
+	REC_INTRO(__ham_replace_read, 1);
+
+	if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+		if (DB_UNDO(op)) {
+			/*
+			 * We are undoing and the page doesn't exist.  That
+			 * is equivalent to having a pagelsn of 0, so we
+			 * would not have to undo anything.  In this case,
+			 * don't bother creating a page.
+			 */
+			goto done;
+		} else if ((ret = memp_fget(mpf, &argp->pgno,
+		    DB_MPOOL_CREATE, &pagep)) != 0)
+			goto out;
+	}
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto out;
+	getmeta = 1;
+
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+	CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+
+	memset(&dbt, 0, sizeof(dbt));
+	flags = 0;
+	grow = 1;
+
+	if (cmp_p == 0 && DB_REDO(op)) {
+		/* Reapply the change as specified. */
+		dbt.data = argp->newitem.data;
+		dbt.size = argp->newitem.size;
+		grow = argp->newitem.size - argp->olditem.size;
+		LSN(pagep) = *lsnp;
+		flags = DB_MPOOL_DIRTY;
+	} else if (cmp_n == 0 && DB_UNDO(op)) {
+		/* Undo the already applied change. */
+		dbt.data = argp->olditem.data;
+		dbt.size = argp->olditem.size;
+		grow = argp->olditem.size - argp->newitem.size;
+		LSN(pagep) = argp->pagelsn;
+		flags = DB_MPOOL_DIRTY;
+	}
+
+	if (flags) {
+		__ham_onpage_replace(pagep,
+		    file_dbp->pgsize, argp->ndx, argp->off, grow, &dbt);
+		if (argp->makedup) {
+			hk = P_ENTRY(pagep, argp->ndx);
+			if (DB_REDO(op))
+				HPAGE_PTYPE(hk) = H_DUPLICATE;
+			else
+				HPAGE_PTYPE(hk) = H_KEYDATA;
+		}
+	}
+
+	if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+		goto out;
+
+done:	*lsnp = argp->prev_lsn;
+	ret = 0;
+
+out:	if (getmeta)
+		(void)__ham_release_meta(dbc);
+	REC_CLOSE;
+}
+
+/*
+ * __ham_splitdata_recover --
+ *
+ * PUBLIC: int __ham_splitdata_recover
+ * PUBLIC:    __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_splitdata_recover(dbenv, dbtp, lsnp, op, info)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__ham_splitdata_args *argp;
+	DB *file_dbp;
+	DBC *dbc;
+	DB_MPOOLFILE *mpf;
+	PAGE *pagep;
+	int cmp_n, cmp_p, flags, getmeta, ret;
+
+	COMPQUIET(info, NULL);
+
+	getmeta = 0;
+	REC_PRINT(__ham_splitdata_print);
+	REC_INTRO(__ham_splitdata_read, 1);
+
+	if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+		if (DB_UNDO(op)) {
+			/*
+			 * We are undoing and the page doesn't exist.  That
+			 * is equivalent to having a pagelsn of 0, so we
+			 * would not have to undo anything.  In this case,
+			 * don't bother creating a page.
+			 */
+			goto done;
+		} else if ((ret = memp_fget(mpf, &argp->pgno,
+		    DB_MPOOL_CREATE, &pagep)) != 0)
+			goto out;
+	}
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto out;
+	getmeta = 1;
+
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+	CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+
+	/*
+	 * There are two types of log messages here, one for the old page
+	 * and one for the new pages created.  The original image in the
+	 * SPLITOLD record is used for undo.  The image in the SPLITNEW
+	 * is used for redo.  We should never have a case where there is
+	 * a redo operation and the SPLITOLD record is on disk, but not
+	 * the SPLITNEW record.  Therefore, we only have work to do when
+	 * redo NEW messages and undo OLD messages, but we have to update
+	 * LSNs in both cases.
+	 */
+	flags = 0;
+	if (cmp_p == 0 && DB_REDO(op)) {
+		if (argp->opcode == SPLITNEW)
+			/* Need to redo the split described. */
+			memcpy(pagep, argp->pageimage.data,
+			    argp->pageimage.size);
+		LSN(pagep) = *lsnp;
+		flags = DB_MPOOL_DIRTY;
+	} else if (cmp_n == 0 && DB_UNDO(op)) {
+		if (argp->opcode == SPLITOLD) {
+			/* Put back the old image. */
+			memcpy(pagep, argp->pageimage.data,
+			    argp->pageimage.size);
+		} else
+			P_INIT(pagep, file_dbp->pgsize, argp->pgno,
+			    PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+		LSN(pagep) = argp->pagelsn;
+		flags = DB_MPOOL_DIRTY;
+	}
+	if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+		goto out;
+
+done:	*lsnp = argp->prev_lsn;
+	ret = 0;
+
+out:	if (getmeta)
+		(void)__ham_release_meta(dbc);
+	REC_CLOSE;
+}
+
+/*
+ * __ham_copypage_recover --
+ *	Recovery function for copypage.
+ *
+ * PUBLIC: int __ham_copypage_recover
+ * PUBLIC:   __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_copypage_recover(dbenv, dbtp, lsnp, op, info)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__ham_copypage_args *argp;
+	DB *file_dbp;
+	DBC *dbc;
+	DB_MPOOLFILE *mpf;
+	PAGE *pagep;
+	int cmp_n, cmp_p, flags, getmeta, ret;
+
+	COMPQUIET(info, NULL);
+
+	getmeta = 0;
+	REC_PRINT(__ham_copypage_print);
+	REC_INTRO(__ham_copypage_read, 1);
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto out;
+	getmeta = 1;
+	flags = 0;
+
+	/* This is the bucket page. */
+	if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+		if (DB_UNDO(op)) {
+			/*
+			 * We are undoing and the page doesn't exist.  That
+			 * is equivalent to having a pagelsn of 0, so we
+			 * would not have to undo anything.  In this case,
+			 * don't bother creating a page.
+			 */
+			ret = 0;
+			goto donext;
+		} else if ((ret = memp_fget(mpf, &argp->pgno,
+		    DB_MPOOL_CREATE, &pagep)) != 0)
+			goto out;
+	}
+
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+	CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+
+	if (cmp_p == 0 && DB_REDO(op)) {
+		/* Need to redo update described. */
+		memcpy(pagep, argp->page.data, argp->page.size);
+		PGNO(pagep) = argp->pgno;
+		PREV_PGNO(pagep) = PGNO_INVALID;
+		LSN(pagep) = *lsnp;
+		flags = DB_MPOOL_DIRTY;
+	} else if (cmp_n == 0 && DB_UNDO(op)) {
+		/* Need to undo update described. */
+		P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
+		    argp->next_pgno, 0, P_HASH);
+		LSN(pagep) = argp->pagelsn;
+		flags = DB_MPOOL_DIRTY;
+	}
+	if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+		goto out;
+
+donext:	/* Now fix up the "next" page. */
+	if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
+		if (DB_UNDO(op)) {
+			/*
+			 * We are undoing and the page doesn't exist.  That
+			 * is equivalent to having a pagelsn of 0, so we
+			 * would not have to undo anything.  In this case,
+			 * don't bother creating a page.
+			 */
+			ret = 0;
+			goto do_nn;
+		} else if ((ret = memp_fget(mpf, &argp->next_pgno,
+		    DB_MPOOL_CREATE, &pagep)) != 0)
+			goto out;
+	}
+
+	/* For REDO just update the LSN. For UNDO copy page back. */
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
+	CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->nextlsn);
+	flags = 0;
+	if (cmp_p == 0 && DB_REDO(op)) {
+		LSN(pagep) = *lsnp;
+		flags = DB_MPOOL_DIRTY;
+	} else if (cmp_n == 0 && DB_UNDO(op)) {
+		/* Need to undo update described. */
+		memcpy(pagep, argp->page.data, argp->page.size);
+		flags = DB_MPOOL_DIRTY;
+	}
+	if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+		goto out;
+
+	/* Now fix up the next's next page. */
+do_nn:	if (argp->nnext_pgno == PGNO_INVALID)
+		goto done;
+
+	if ((ret = memp_fget(mpf, &argp->nnext_pgno, 0, &pagep)) != 0) {
+		if (DB_UNDO(op)) {
+			/*
+			 * We are undoing and the page doesn't exist.  That
+			 * is equivalent to having a pagelsn of 0, so we
+			 * would not have to undo anything.  In this case,
+			 * don't bother creating a page.
+			 */
+			goto done;
+		} else if ((ret = memp_fget(mpf, &argp->nnext_pgno,
+		    DB_MPOOL_CREATE, &pagep)) != 0)
+			goto out;
+	}
+
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->nnextlsn);
+	CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->nnextlsn);
+
+	flags = 0;
+	if (cmp_p == 0 && DB_REDO(op)) {
+		/* Need to redo update described. */
+		PREV_PGNO(pagep) = argp->pgno;
+		LSN(pagep) = *lsnp;
+		flags = DB_MPOOL_DIRTY;
+	} else if (cmp_n == 0 && DB_UNDO(op)) {
+		/* Need to undo update described. */
+		PREV_PGNO(pagep) = argp->next_pgno;
+		LSN(pagep) = argp->nnextlsn;
+		flags = DB_MPOOL_DIRTY;
+	}
+	if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+		goto out;
+
+done:	*lsnp = argp->prev_lsn;
+	ret = 0;
+
+out:	if (getmeta)
+		(void)__ham_release_meta(dbc);
+	REC_CLOSE;
+}
+
+/*
+ * __ham_metagroup_recover --
+ *	Recovery function for metagroup.
+ *
+ * PUBLIC: int __ham_metagroup_recover
+ * PUBLIC:   __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_metagroup_recover(dbenv, dbtp, lsnp, op, info)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__ham_metagroup_args *argp;
+	HASH_CURSOR *hcp;
+	DB *file_dbp;
+	DBC *dbc;
+	DB_MPOOLFILE *mpf;
+	PAGE *pagep;
+	db_pgno_t last_pgno;
+	int cmp_n, cmp_p, flags, groupgrow, ret;
+
+	COMPQUIET(info, NULL);
+	REC_PRINT(__ham_metagroup_print);
+	REC_INTRO(__ham_metagroup_read, 1);
+
+	/*
+	 * This logs the virtual create of pages pgno to pgno + bucket
+	 * Since the mpool page-allocation is not really able to be
+	 * transaction protected, we can never undo it.  Even in an abort,
+	 * we have to allocate these pages to the hash table.
+	 * The log record contains:
+	 * bucket: new bucket being allocated.
+	 * pgno: page number of the new bucket.
+	 * if bucket is a power of 2, then we allocated a whole batch of
+	 * pages; if it's not, then we simply allocated one new page.
+	 */
+	groupgrow =
+	    (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1;
+
+	last_pgno = argp->pgno;
+	if (groupgrow)
+		/* Read the last page. */
+		last_pgno += argp->bucket;
+
+	if ((ret = memp_fget(mpf, &last_pgno, DB_MPOOL_CREATE, &pagep)) != 0)
+		goto out;
+
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+	CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+
+	flags = 0;
+	if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && DB_UNDO(op))) {
+		/*
+		 * We need to make sure that we redo the allocation of the
+		 * pages.
+		 */
+		if (DB_REDO(op))
+			pagep->lsn = *lsnp;
+		else
+			pagep->lsn = argp->pagelsn;
+		flags = DB_MPOOL_DIRTY;
+	}
+	if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+		goto out;
+
+	/* Now we have to update the meta-data page. */
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto out;
+	cmp_n = log_compare(lsnp, &hcp->hdr->dbmeta.lsn);
+	cmp_p = log_compare(&hcp->hdr->dbmeta.lsn, &argp->metalsn);
+	CHECK_LSN(op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn);
+	if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && DB_UNDO(op))) {
+		if (DB_REDO(op)) {
+			/* Redo the actual updating of bucket counts. */
+			++hcp->hdr->max_bucket;
+			if (groupgrow) {
+				hcp->hdr->low_mask = hcp->hdr->high_mask;
+				hcp->hdr->high_mask =
+				    (argp->bucket + 1) | hcp->hdr->low_mask;
+			}
+			hcp->hdr->dbmeta.lsn = *lsnp;
+		} else {
+			/* Undo the actual updating of bucket counts. */
+			--hcp->hdr->max_bucket;
+			if (groupgrow) {
+				hcp->hdr->high_mask = hcp->hdr->low_mask;
+				hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
+			}
+			hcp->hdr->dbmeta.lsn = argp->metalsn;
+		}
+		if (groupgrow &&
+		    hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] ==
+		    PGNO_INVALID)
+			hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] =
+			    argp->pgno - argp->bucket - 1;
+		F_SET(hcp, H_DIRTY);
+	}
+	if ((ret = __ham_release_meta(dbc)) != 0)
+		goto out;
+
+done:	*lsnp = argp->prev_lsn;
+	ret = 0;
+
+out:	REC_CLOSE;
+}
+
+/*
+ * __ham_groupalloc_recover --
+ *	Recover the batch creation of a set of pages for a new database.
+ *
+ * PUBLIC: int __ham_groupalloc_recover
+ * PUBLIC:   __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_groupalloc_recover(dbenv, dbtp, lsnp, op, info)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__ham_groupalloc_args *argp;
+	DBMETA *mmeta;
+	DB_MPOOLFILE *mpf;
+	DB *file_dbp;
+	DBC *dbc;
+	db_pgno_t pgno;
+	int cmp_n, cmp_p, flags, ret;
+
+	REC_PRINT(__ham_groupalloc_print);
+	REC_INTRO(__ham_groupalloc_read, 0);
+
+	pgno = PGNO_BASE_MD;
+	if ((ret = memp_fget(mpf, &pgno, 0, &mmeta)) != 0) {
+		if (DB_REDO(op)) {
+			/* Page should have existed. */
+			(void)__db_pgerr(file_dbp, pgno);
+			goto out;
+		} else {
+			ret = 0;
+			goto done;
+		}
+	}
+
+	cmp_n = log_compare(lsnp, &LSN(mmeta));
+	cmp_p = log_compare(&LSN(mmeta), &argp->meta_lsn);
+	CHECK_LSN(op, cmp_p, &LSN(mmeta), &argp->meta_lsn);
+
+	/*
+	 * Basically, we used mpool to allocate a chunk of pages.
+	 * We need to either add those to a free list (in the undo
+	 * case) or initialize them (in the redo case).
+	 *
+	 * If we are redoing and this is a hash subdatabase, it's possible
+	 * that the pages were never allocated, so we'd better check for
+	 * that and handle it here.
+	 */
+
+	flags = 0;
+	if (DB_REDO(op)) {
+		if ((ret = __ham_alloc_pages(file_dbp, argp)) != 0)
+			goto out1;
+		if (cmp_p == 0) {
+			LSN(mmeta) = *lsnp;
+			flags = DB_MPOOL_DIRTY;
+		}
+	}
+
+	/*
+	 * Always put the pages into the limbo list and free them later.
+	 */
+	else if (DB_UNDO(op)) {
+		if ((ret = __db_add_limbo(dbenv,
+		    info, argp->fileid, argp->start_pgno, argp->num)) != 0)
+			goto out;
+		if (cmp_n == 0) {
+			LSN(mmeta) = argp->meta_lsn;
+			flags = DB_MPOOL_DIRTY;
+		}
+	}
+
+out1:	if ((ret = memp_fput(mpf, mmeta, flags)) != 0)
+		goto out;
+
+done:	if (ret == 0)
+		*lsnp = argp->prev_lsn;
+
+out:	REC_CLOSE;
+}
+
+/*
+ * __ham_alloc_pages --
+ *
+ * Called during redo of a file create.  We create new pages in the file
+ * using the MPOOL_NEW_GROUP flag.  We then log the meta-data page with a
+ * __crdel_metasub message.  If we manage to crash without the newly written
+ * pages getting to disk (I'm not sure this can happen anywhere except our
+ * test suite?!), then we need to go through a recreate the final pages.
+ * Hash normally has holes in its files and handles them appropriately.
+ */
+static int
+__ham_alloc_pages(dbp, argp)
+	DB *dbp;
+	__ham_groupalloc_args *argp;
+{
+	DB_MPOOLFILE *mpf;
+	PAGE *pagep;
+	db_pgno_t pgno;
+	int ret;
+
+	mpf = dbp->mpf;
+
+	/* Read the last page of the allocation. */
+	pgno = argp->start_pgno + argp->num - 1;
+
+	/* If the page exists, and it has been initialized, then we're done. */
+	if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) == 0) {
+		if ((pagep->type == P_INVALID) && IS_ZERO_LSN(pagep->lsn))
+			goto reinit_page;
+		if ((ret = memp_fput(mpf, pagep, 0)) != 0)
+			return (ret);
+		return (0);
+	}
+
+	/*
+	 * Had to create the page.  On some systems (read "Windows"),
+	 * you can find random garbage on pages to which you haven't
+	 * yet written.  So, we have an os layer that will do the
+	 * right thing for group allocations.  We call that directly
+	 * to make sure all the pages are allocated and then continue
+	 * merrily on our way with normal recovery.
+	 */
+	if ((ret = __os_fpinit(dbp->dbenv, &mpf->fh,
+	    argp->start_pgno, argp->num, dbp->pgsize)) != 0)
+		return (ret);
+
+	if ((ret = memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
+		(void)__db_pgerr(dbp, pgno);
+		return (ret);
+	}
+
+reinit_page:
+	/* Initialize the newly allocated page. */
+	P_INIT(pagep,
+	    dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+	ZERO_LSN(pagep->lsn);
+
+	if ((ret = memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
+		return (ret);
+
+	return (0);
+}
+
+/*
+ * __ham_curadj_recover --
+ *	Undo cursor adjustments if a subtransaction fails.
+ *
+ * PUBLIC: int __ham_curadj_recover
+ * PUBLIC:   __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+
+int
+__ham_curadj_recover(dbenv, dbtp, lsnp, op, info)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__ham_curadj_args *argp;
+	DB_MPOOLFILE *mpf;
+	DB *file_dbp;
+	DBC *dbc;
+	int ret;
+	HASH_CURSOR *hcp;
+
+	REC_PRINT(__ham_groupalloc_print);
+
+	ret = 0;
+	if (op != DB_TXN_ABORT)
+		goto done;
+	REC_INTRO(__ham_curadj_read, 0);
+
+	COMPQUIET(info, NULL);
+	/*
+	 * Undo the adjustment by reinitializing the the cursor
+	 * to look like the one that was used to do the adustment,
+	 * then we invert the add so that undo the adjustment.
+	 */
+	hcp = (HASH_CURSOR *)dbc->internal;
+	hcp->pgno = argp->pgno;
+	hcp->indx = argp->indx;
+	hcp->dup_off = argp->dup_off;
+	hcp->order = argp->order;
+	if (!argp->add)
+		F_SET(hcp, H_DELETED);
+	(void)__ham_c_update(dbc, argp->len, !argp->add, argp->is_dup);
+
+done:	*lsnp = argp->prev_lsn;
+out:	REC_CLOSE;
+}
+
+/*
+ * __ham_chgpg_recover --
+ *	Undo cursor adjustments if a subtransaction fails.
+ *
+ * PUBLIC: int __ham_chgpg_recover
+ * PUBLIC:   __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+
+int
+__ham_chgpg_recover(dbenv, dbtp, lsnp, op, info)
+	DB_ENV *dbenv;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	db_recops op;
+	void *info;
+{
+	__ham_chgpg_args *argp;
+	BTREE_CURSOR *opdcp;
+	DB_MPOOLFILE *mpf;
+	DB *file_dbp, *ldbp;
+	DBC *dbc;
+	int ret;
+	DBC *cp;
+	HASH_CURSOR *lcp;
+
+	REC_PRINT(__ham_chgpg_print);
+
+	ret = 0;
+	if (op != DB_TXN_ABORT)
+		goto out;
+	REC_INTRO(__ham_chgpg_read, 0);
+
+	COMPQUIET(info, NULL);
+
+	MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+	for (ldbp = __dblist_get(dbenv, file_dbp->adj_fileid);
+	    ldbp != NULL && ldbp->adj_fileid == file_dbp->adj_fileid;
+	    ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+		MUTEX_THREAD_LOCK(dbenv, file_dbp->mutexp);
+
+		for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+		    cp = TAILQ_NEXT(cp, links)) {
+			lcp = (HASH_CURSOR *)cp->internal;
+
+			switch (argp->mode) {
+			case DB_HAM_CHGPG:
+				if (lcp->pgno != argp->new_pgno)
+					break;
+
+				if (argp->old_indx == NDX_INVALID)
+					lcp->pgno = argp->old_pgno;
+				else if (lcp->indx == argp->new_indx) {
+					lcp->indx = argp->old_indx;
+					lcp->pgno = argp->old_pgno;
+				}
+				break;
+
+			case DB_HAM_SPLIT:
+				if (lcp->pgno == argp->new_pgno
+				     && lcp->indx == argp->new_indx) {
+					lcp->indx = argp->old_indx;
+					lcp->pgno = argp->old_pgno;
+				}
+				break;
+
+			case DB_HAM_DUP:
+				if (lcp->opd != NULL) {
+					opdcp =
+					    (BTREE_CURSOR *)lcp->opd->internal;
+					if (opdcp->pgno == argp->new_pgno &&
+					     opdcp->indx == argp->new_indx) {
+						if (F_ISSET(opdcp, C_DELETED))
+							F_SET(lcp, H_DELETED);
+						if ((ret =
+						    lcp->opd->c_close(
+						    lcp->opd)) != 0)
+							goto out;
+						lcp->opd = NULL;
+					}
+				}
+				break;
+			}
+		}
+
+		MUTEX_THREAD_UNLOCK(dbenv, file_dbp->mutexp);
+	}
+	MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+done:	*lsnp = argp->prev_lsn;
+	ret = 0;
+out:	REC_CLOSE;
+}
diff --git a/bdb/hash/hash_reclaim.c b/bdb/hash/hash_reclaim.c
new file mode 100644
index 00000000000..8857c5406a4
--- /dev/null
+++ b/bdb/hash/hash_reclaim.c
@@ -0,0 +1,68 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_reclaim.c,v 11.4 2000/11/30 00:58:37 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "hash.h"
+#include "lock.h"
+
+/*
+ * __ham_reclaim --
+ *	Reclaim the pages from a subdatabase and return them to the
+ * parent free list.  For now, we link each freed page on the list
+ * separately.  If people really store hash databases in subdatabases
+ * and do a lot of creates and deletes, this is going to be a problem,
+ * because hash needs chunks of contiguous storage.  We may eventually
+ * need to go to a model where we maintain the free list with chunks of
+ * contiguous pages as well.
+ *
+ * PUBLIC: int __ham_reclaim __P((DB *, DB_TXN *txn));
+ */
+int
+__ham_reclaim(dbp, txn)
+	DB *dbp;
+	DB_TXN *txn;
+{
+	DBC *dbc;
+	HASH_CURSOR *hcp;
+	int ret;
+
+	/* Open up a cursor that we'll use for traversing. */
+	if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
+		return (ret);
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto err;
+
+	if ((ret = __ham_traverse(dbp,
+	    dbc, DB_LOCK_WRITE, __db_reclaim_callback, dbc)) != 0)
+		goto err;
+	if ((ret = dbc->c_close(dbc)) != 0)
+		goto err;
+	if ((ret = __ham_release_meta(dbc)) != 0)
+		goto err;
+	return (0);
+
+err:	if (hcp->hdr != NULL)
+		(void)__ham_release_meta(dbc);
+	(void)dbc->c_close(dbc);
+	return (ret);
+}
diff --git a/bdb/hash/hash_stat.c b/bdb/hash/hash_stat.c
new file mode 100644
index 00000000000..ed64bbc68bd
--- /dev/null
+++ b/bdb/hash/hash_stat.c
@@ -0,0 +1,329 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_stat.c,v 11.24 2000/12/21 21:54:35 margo Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "btree.h"
+#include "hash.h"
+#include "lock.h"
+
+static int __ham_stat_callback __P((DB *, PAGE *, void *, int *));
+
+/*
+ * __ham_stat --
+ *	Gather/print the hash statistics
+ *
+ * PUBLIC: int __ham_stat __P((DB *, void *, void *(*)(size_t), u_int32_t));
+ */
+int
+__ham_stat(dbp, spp, db_malloc, flags)
+	DB *dbp;
+	void *spp, *(*db_malloc) __P((size_t));
+	u_int32_t flags;
+{
+	DB_HASH_STAT *sp;
+	HASH_CURSOR *hcp;
+	DBC *dbc;
+	PAGE *h;
+	db_pgno_t pgno;
+	int ret;
+
+	PANIC_CHECK(dbp->dbenv);
+	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat");
+
+	sp = NULL;
+
+	/* Check for invalid flags. */
+	if ((ret = __db_statchk(dbp, flags)) != 0)
+		return (ret);
+
+	if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
+		return (ret);
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto err;
+
+	/* Allocate and clear the structure. */
+	if ((ret = __os_malloc(dbp->dbenv, sizeof(*sp), db_malloc, &sp)) != 0)
+		goto err;
+	memset(sp, 0, sizeof(*sp));
+	if (flags == DB_CACHED_COUNTS) {
+		sp->hash_nkeys = hcp->hdr->dbmeta.key_count;
+		sp->hash_ndata = hcp->hdr->dbmeta.record_count;
+		goto done;
+	}
+
+	/* Copy the fields that we have. */
+	sp->hash_pagesize = dbp->pgsize;
+	sp->hash_buckets = hcp->hdr->max_bucket + 1;
+	sp->hash_magic = hcp->hdr->dbmeta.magic;
+	sp->hash_version = hcp->hdr->dbmeta.version;
+	sp->hash_metaflags = hcp->hdr->dbmeta.flags;
+	sp->hash_nelem = hcp->hdr->nelem;
+	sp->hash_ffactor = hcp->hdr->ffactor;
+
+	/* Walk the free list, counting pages. */
+	for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free;
+	    pgno != PGNO_INVALID;) {
+		++sp->hash_free;
+
+		if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+			goto err;
+
+		pgno = h->next_pgno;
+		(void)memp_fput(dbp->mpf, h, 0);
+	}
+
+	/* Now traverse the rest of the table. */
+	if ((ret = __ham_traverse(dbp,
+	    dbc, DB_LOCK_READ, __ham_stat_callback, sp)) != 0)
+		goto err;
+
+	if (!F_ISSET(dbp, DB_AM_RDONLY)) {
+		if ((ret = __ham_dirty_meta(dbc)) != 0)
+			goto err;
+		hcp->hdr->dbmeta.key_count = sp->hash_nkeys;
+		hcp->hdr->dbmeta.record_count = sp->hash_ndata;
+	}
+
+done:
+	if ((ret = __ham_release_meta(dbc)) != 0)
+		goto err;
+	if ((ret = dbc->c_close(dbc)) != 0)
+		goto err;
+
+	*(DB_HASH_STAT **)spp = sp;
+	return (0);
+
+err:	if (sp != NULL)
+		__os_free(sp, sizeof(*sp));
+	if (hcp->hdr != NULL)
+		(void)__ham_release_meta(dbc);
+	(void)dbc->c_close(dbc);
+	return (ret);
+
+}
+
+/*
+ * __ham_traverse
+ *	 Traverse an entire hash table.  We use the callback so that we
+ * can use this both for stat collection and for deallocation.
+ *
+ * PUBLIC:  int __ham_traverse __P((DB *, DBC *, db_lockmode_t,
+ * PUBLIC:     int (*)(DB *, PAGE *, void *, int *), void *));
+ */
+int
+__ham_traverse(dbp, dbc, mode, callback, cookie)
+	DB *dbp;
+	DBC *dbc;
+	db_lockmode_t mode;
+	int (*callback) __P((DB *, PAGE *, void *, int *));
+	void *cookie;
+{
+	HASH_CURSOR *hcp;
+	HKEYDATA *hk;
+	DBC *opd;
+	db_pgno_t pgno, opgno;
+	u_int32_t bucket;
+	int did_put, i, ret, t_ret;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	opd = NULL;
+	ret = 0;
+
+	/*
+	 * In a perfect world, we could simply read each page in the file
+	 * and look at its page type to tally the information necessary.
+	 * Unfortunately, the bucket locking that hash tables do to make
+	 * locking easy, makes this a pain in the butt.  We have to traverse
+	 * duplicate, overflow and big pages from the bucket so that we
+	 * don't access anything that isn't properly locked.
+	 */
+	for (bucket = 0; bucket <= hcp->hdr->max_bucket; bucket++) {
+		hcp->bucket = bucket;
+		hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket);
+		for (ret = __ham_get_cpage(dbc, mode); ret == 0;
+		    ret = __ham_next_cpage(dbc, pgno, 0)) {
+			pgno = NEXT_PGNO(hcp->page);
+
+			/*
+			 * Go through each item on the page checking for
+			 * duplicates (in which case we have to count the
+			 * duplicate pages) or big key/data items (in which
+			 * case we have to count those pages).
+			 */
+			for (i = 0; i < NUM_ENT(hcp->page); i++) {
+				hk = (HKEYDATA *)P_ENTRY(hcp->page, i);
+				switch (HPAGE_PTYPE(hk)) {
+				case H_OFFDUP:
+					memcpy(&opgno, HOFFDUP_PGNO(hk),
+					    sizeof(db_pgno_t));
+					if ((ret = __db_c_newopd(dbc,
+					    opgno, &opd)) != 0)
+						return (ret);
+					if ((ret = __bam_traverse(opd,
+					    DB_LOCK_READ, opgno,
+					    __ham_stat_callback, cookie))
+					    != 0)
+						goto err;
+					if ((ret = opd->c_close(opd)) != 0)
+						return (ret);
+					opd = NULL;
+					break;
+				case H_OFFPAGE:
+					/*
+					 * We are about to get a big page
+					 * which will use the same spot that
+					 * the current page uses, so we need
+					 * to restore the current page before
+					 * looking at it again.
+					 */
+					memcpy(&opgno, HOFFPAGE_PGNO(hk),
+					    sizeof(db_pgno_t));
+					if ((ret = __db_traverse_big(dbp,
+					    opgno, callback, cookie)) != 0)
+						goto err;
+					break;
+				case H_KEYDATA:
+					break;
+				}
+			}
+
+			/* Call the callback on main pages. */
+			if ((ret = callback(dbp,
+			    hcp->page, cookie, &did_put)) != 0)
+				goto err;
+
+			if (did_put)
+				hcp->page = NULL;
+			if (pgno == PGNO_INVALID)
+				break;
+		}
+		if (ret != 0)
+			goto err;
+
+		if (STD_LOCKING(dbc))
+			(void)lock_put(dbp->dbenv, &hcp->lock);
+
+		if (hcp->page != NULL) {
+			if ((ret = memp_fput(dbc->dbp->mpf, hcp->page, 0)) != 0)
+				return (ret);
+			hcp->page = NULL;
+		}
+
+	}
+err:	if (opd != NULL &&
+	    (t_ret = opd->c_close(opd)) != 0 && ret == 0)
+		ret = t_ret;
+	return (ret);
+}
+
+static int
+__ham_stat_callback(dbp, pagep, cookie, putp)
+	DB *dbp;
+	PAGE *pagep;
+	void *cookie;
+	int *putp;
+{
+	DB_HASH_STAT *sp;
+	DB_BTREE_STAT bstat;
+	db_indx_t indx, len, off, tlen, top;
+	u_int8_t *hk;
+
+	*putp = 0;
+	sp = cookie;
+
+	switch (pagep->type) {
+	case P_INVALID:
+		/*
+		 * Hash pages may be wholly zeroed;  this is not a bug.
+		 * Obviously such pages have no data, so we can just proceed.
+		 */
+		break;
+	case P_HASH:
+		/*
+		 * We count the buckets and the overflow pages
+		 * separately and tally their bytes separately
+		 * as well.  We need to figure out if this page
+		 * is a bucket.
+		 */
+		if (PREV_PGNO(pagep) == PGNO_INVALID)
+			sp->hash_bfree += P_FREESPACE(pagep);
+		else {
+			sp->hash_overflows++;
+			sp->hash_ovfl_free += P_FREESPACE(pagep);
+		}
+		top = NUM_ENT(pagep);
+		/* Correct for on-page duplicates and deleted items. */
+		for (indx = 0; indx < top; indx += P_INDX) {
+			switch (*H_PAIRDATA(pagep, indx)) {
+			case H_OFFDUP:
+			case H_OFFPAGE:
+				break;
+			case H_KEYDATA:
+				sp->hash_ndata++;
+				break;
+			case H_DUPLICATE:
+				tlen = LEN_HDATA(pagep, 0, indx);
+				hk = H_PAIRDATA(pagep, indx);
+				for (off = 0; off < tlen;
+				    off += len + 2 * sizeof (db_indx_t)) {
+					sp->hash_ndata++;
+					memcpy(&len,
+					    HKEYDATA_DATA(hk)
+					    + off, sizeof(db_indx_t));
+				}
+			}
+		}
+		sp->hash_nkeys += H_NUMPAIRS(pagep);
+		break;
+	case P_IBTREE:
+	case P_IRECNO:
+	case P_LBTREE:
+	case P_LRECNO:
+	case P_LDUP:
+		/*
+		 * These are all btree pages; get a correct
+		 * cookie and call them.  Then add appropriate
+		 * fields into our stat structure.
+		 */
+		memset(&bstat, 0, sizeof(bstat));
+		bstat.bt_dup_pgfree = 0;
+		bstat.bt_int_pgfree = 0;
+		bstat.bt_leaf_pgfree = 0;
+		bstat.bt_ndata = 0;
+		__bam_stat_callback(dbp, pagep, &bstat, putp);
+		sp->hash_dup++;
+		sp->hash_dup_free += bstat.bt_leaf_pgfree +
+		    bstat.bt_dup_pgfree + bstat.bt_int_pgfree;
+		sp->hash_ndata += bstat.bt_ndata;
+		break;
+	case P_OVERFLOW:
+		sp->hash_bigpages++;
+		sp->hash_big_bfree += P_OVFLSPACE(dbp->pgsize, pagep);
+		break;
+	default:
+		return (__db_unknown_type(dbp->dbenv,
+		     "__ham_stat_callback", pagep->type));
+	}
+
+	return (0);
+}
diff --git a/bdb/hash/hash_upgrade.c b/bdb/hash/hash_upgrade.c
new file mode 100644
index 00000000000..c34381276b4
--- /dev/null
+++ b/bdb/hash/hash_upgrade.c
@@ -0,0 +1,271 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_upgrade.c,v 11.25 2000/12/14 19:18:32 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <limits.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "hash.h"
+#include "db_upgrade.h"
+
+/*
+ * __ham_30_hashmeta --
+ *      Upgrade the database from version 4/5 to version 6.
+ *
+ * PUBLIC: int __ham_30_hashmeta __P((DB *, char *, u_int8_t *));
+ */
+int
+__ham_30_hashmeta(dbp, real_name, obuf)
+	DB *dbp;
+	char *real_name;
+	u_int8_t *obuf;
+{
+	DB_ENV *dbenv;
+	HASHHDR *oldmeta;
+	HMETA30 newmeta;
+	u_int32_t *o_spares, *n_spares;
+	u_int32_t fillf, maxb, nelem;
+	int i, max_entry, ret;
+
+	dbenv = dbp->dbenv;
+	memset(&newmeta, 0, sizeof(newmeta));
+
+	oldmeta = (HASHHDR *)obuf;
+
+	/*
+	 * The first 32 bytes are similar.  The only change is the version
+	 * and that we removed the ovfl_point and have the page type now.
+	 */
+
+	newmeta.dbmeta.lsn = oldmeta->lsn;
+	newmeta.dbmeta.pgno = oldmeta->pgno;
+	newmeta.dbmeta.magic = oldmeta->magic;
+	newmeta.dbmeta.version = 6;
+	newmeta.dbmeta.pagesize = oldmeta->pagesize;
+	newmeta.dbmeta.type = P_HASHMETA;
+
+	/* Move flags */
+	newmeta.dbmeta.flags = oldmeta->flags;
+
+	/* Copy the free list, which has changed its name but works the same. */
+	newmeta.dbmeta.free = oldmeta->last_freed;
+
+	/* Copy: max_bucket, high_mask, low-mask, ffactor, nelem, h_charkey */
+	newmeta.max_bucket = oldmeta->max_bucket;
+	newmeta.high_mask = oldmeta->high_mask;
+	newmeta.low_mask = oldmeta->low_mask;
+	newmeta.ffactor = oldmeta->ffactor;
+	newmeta.nelem = oldmeta->nelem;
+	newmeta.h_charkey = oldmeta->h_charkey;
+
+	/*
+	 * There was a bug in 2.X versions where the nelem could go negative.
+	 * In general, this is considered "bad."  If it does go negative
+	 * (that is, very large and positive), we'll die trying to dump and
+	 * load this database.  So, let's see if we can fix it here.
+	 */
+	nelem = newmeta.nelem;
+	fillf = newmeta.ffactor;
+	maxb = newmeta.max_bucket;
+
+	if ((fillf != 0 && fillf * maxb < 2 * nelem) ||
+	    (fillf == 0 && nelem > 0x8000000))
+		newmeta.nelem = 0;
+
+	/*
+	 * We now have to convert the spares array.  The old spares array
+	 * contained the total number of extra pages allocated prior to
+	 * the bucket that begins the next doubling.  The new spares array
+	 * contains the page number of the first bucket in the next doubling
+	 * MINUS the bucket number of that bucket.
+	 */
+	o_spares = oldmeta->spares;
+	n_spares = newmeta.spares;
+	max_entry = __db_log2(maxb + 1);   /* highest spares entry in use */
+	n_spares[0] = 1;
+	for (i = 1; i < NCACHED && i <= max_entry; i++)
+		n_spares[i] = 1 + o_spares[i - 1];
+
+					/* Replace the unique ID. */
+	if ((ret = __os_fileid(dbenv, real_name, 1, newmeta.dbmeta.uid)) != 0)
+		return (ret);
+
+	/* Overwrite the original. */
+	memcpy(oldmeta, &newmeta, sizeof(newmeta));
+
+	return (0);
+}
+
+/*
+ * __ham_30_sizefix --
+ *	Make sure that all hash pages belonging to the current
+ *	hash doubling are within the bounds of the file.
+ *
+ * PUBLIC: int __ham_30_sizefix __P((DB *, DB_FH *, char *, u_int8_t *));
+ */
+int
+__ham_30_sizefix(dbp, fhp, realname, metabuf)
+	DB *dbp;
+	DB_FH *fhp;
+	char *realname;
+	u_int8_t *metabuf;
+{
+	u_int8_t buf[DB_MAX_PGSIZE];
+	DB_ENV *dbenv;
+	HMETA30 *meta;
+	db_pgno_t last_actual, last_desired;
+	int ret;
+	size_t nw;
+	u_int32_t pagesize;
+
+	dbenv = dbp->dbenv;
+	memset(buf, 0, DB_MAX_PGSIZE);
+
+	meta = (HMETA30 *)metabuf;
+	pagesize = meta->dbmeta.pagesize;
+
+	/*
+	 * Get the last page number.  To do this, we'll need dbp->pgsize
+	 * to be set right, so slam it into place.
+	 */
+	dbp->pgsize = pagesize;
+	if ((ret = __db_lastpgno(dbp, realname, fhp, &last_actual)) != 0)
+		return (ret);
+
+	/*
+	 * The last bucket in the doubling is equal to high_mask;  calculate
+	 * the page number that implies.
+	 */
+	last_desired = BS_TO_PAGE(meta->high_mask, meta->spares);
+
+	/*
+	 * If last_desired > last_actual, we need to grow the file.  Write
+	 * a zeroed page where last_desired would go.
+	 */
+	if (last_desired > last_actual) {
+		if ((ret = __os_seek(dbenv,
+		    fhp, pagesize, last_desired, 0, 0, DB_OS_SEEK_SET)) != 0)
+			return (ret);
+		if ((ret = __os_write(dbenv, fhp, buf, pagesize, &nw)) != 0)
+			return (ret);
+		if (nw != pagesize) {
+			__db_err(dbenv, "Short write during upgrade");
+			return (EIO);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * __ham_31_hashmeta --
+ *      Upgrade the database from version 6 to version 7.
+ *
+ * PUBLIC: int __ham_31_hashmeta
+ * PUBLIC:      __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+ */
+int
+__ham_31_hashmeta(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	HMETA31 *newmeta;
+	HMETA30 *oldmeta;
+
+	COMPQUIET(dbp, NULL);
+	COMPQUIET(real_name, NULL);
+	COMPQUIET(fhp, NULL);
+
+	newmeta = (HMETA31 *)h;
+	oldmeta = (HMETA30 *)h;
+
+	/*
+	 * Copy the fields down the page.
+	 * The fields may overlap so start at the bottom and use memmove().
+	 */
+	memmove(newmeta->spares, oldmeta->spares, sizeof(oldmeta->spares));
+	newmeta->h_charkey = oldmeta->h_charkey;
+	newmeta->nelem = oldmeta->nelem;
+	newmeta->ffactor = oldmeta->ffactor;
+	newmeta->low_mask = oldmeta->low_mask;
+	newmeta->high_mask = oldmeta->high_mask;
+	newmeta->max_bucket = oldmeta->max_bucket;
+	memmove(newmeta->dbmeta.uid,
+	    oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid));
+	newmeta->dbmeta.flags = oldmeta->dbmeta.flags;
+	newmeta->dbmeta.record_count = 0;
+	newmeta->dbmeta.key_count = 0;
+	ZERO_LSN(newmeta->dbmeta.unused3);
+
+	/* Update the version. */
+	newmeta->dbmeta.version = 7;
+
+	/* Upgrade the flags. */
+	if (LF_ISSET(DB_DUPSORT))
+		F_SET(&newmeta->dbmeta, DB_HASH_DUPSORT);
+
+	*dirtyp = 1;
+	return (0);
+}
+
+/*
+ * __ham_31_hash --
+ *      Upgrade the database hash leaf pages.
+ *
+ * PUBLIC: int __ham_31_hash
+ * PUBLIC:      __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+ */
+int
+__ham_31_hash(dbp, real_name, flags, fhp, h, dirtyp)
+	DB *dbp;
+	char *real_name;
+	u_int32_t flags;
+	DB_FH *fhp;
+	PAGE *h;
+	int *dirtyp;
+{
+	HKEYDATA *hk;
+	db_pgno_t pgno, tpgno;
+	db_indx_t indx;
+	int ret;
+
+	COMPQUIET(flags, 0);
+
+	ret = 0;
+	for (indx = 0; indx < NUM_ENT(h); indx += 2) {
+		hk = (HKEYDATA *)H_PAIRDATA(h, indx);
+		if (HPAGE_PTYPE(hk) == H_OFFDUP) {
+			memcpy(&pgno, HOFFDUP_PGNO(hk), sizeof(db_pgno_t));
+			tpgno = pgno;
+			if ((ret = __db_31_offdup(dbp, real_name, fhp,
+			    LF_ISSET(DB_DUPSORT) ? 1 : 0, &tpgno)) != 0)
+				break;
+			if (pgno != tpgno) {
+				*dirtyp = 1;
+				memcpy(HOFFDUP_PGNO(hk),
+				    &tpgno, sizeof(db_pgno_t));
+			}
+		}
+	}
+
+	return (ret);
+}
diff --git a/bdb/hash/hash_verify.c b/bdb/hash/hash_verify.c
new file mode 100644
index 00000000000..31dd7cc2299
--- /dev/null
+++ b/bdb/hash/hash_verify.c
@@ -0,0 +1,1051 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ *
+ * $Id: hash_verify.c,v 1.31 2000/11/30 00:58:37 ubell Exp $
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_verify.c,v 1.31 2000/11/30 00:58:37 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_verify.h"
+#include "btree.h"
+#include "hash.h"
+
+static int __ham_dups_unsorted __P((DB *, u_int8_t *, u_int32_t));
+static int __ham_vrfy_bucket __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t,
+    u_int32_t));
+static int __ham_vrfy_item __P((DB *,
+    VRFY_DBINFO *, db_pgno_t, PAGE *, u_int32_t, u_int32_t));
+
+/*
+ * __ham_vrfy_meta --
+ *	Verify the hash-specific part of a metadata page.
+ *
+ *	Note that unlike btree, we don't save things off, because we
+ *	will need most everything again to verify each page and the
+ *	amount of state here is significant.
+ *
+ * PUBLIC: int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *,
+ * PUBLIC:     db_pgno_t, u_int32_t));
+ */
+int
+__ham_vrfy_meta(dbp, vdp, m, pgno, flags)
+	DB *dbp;
+	VRFY_DBINFO *vdp;
+	HMETA *m;
+	db_pgno_t pgno;
+	u_int32_t flags;
+{
+	HASH *hashp;
+	VRFY_PAGEINFO *pip;
+	int i, ret, t_ret, isbad;
+	u_int32_t pwr, mbucket;
+	u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t));
+
+	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+		return (ret);
+	isbad = 0;
+
+	hashp = dbp->h_internal;
+
+	if (hashp != NULL && hashp->h_hash != NULL)
+		hfunc = hashp->h_hash;
+	else
+		hfunc = __ham_func5;
+
+	/*
+	 * If we haven't already checked the common fields in pagezero,
+	 * check them.
+	 */
+	if (!F_ISSET(pip, VRFY_INCOMPLETE) &&
+	    (ret = __db_vrfy_meta(dbp, vdp, &m->dbmeta, pgno, flags)) != 0) {
+		if (ret == DB_VERIFY_BAD)
+			isbad = 1;
+		else
+			goto err;
+	}
+
+	/* h_charkey */
+	if (!LF_ISSET(DB_NOORDERCHK))
+		if (m->h_charkey != hfunc(dbp, CHARKEY, sizeof(CHARKEY))) {
+			EPRINT((dbp->dbenv,
+"Database has different custom hash function; reverify with DB_NOORDERCHK set"
+			    ));
+			/*
+			 * Return immediately;  this is probably a sign
+			 * of user error rather than database corruption, so
+			 * we want to avoid extraneous errors.
+			 */
+			isbad = 1;
+			goto err;
+		}
+
+	/* max_bucket must be less than the last pgno. */
+	if (m->max_bucket > vdp->last_pgno) {
+		EPRINT((dbp->dbenv,
+		    "Impossible max_bucket %lu on meta page %lu",
+		    m->max_bucket, pgno));
+		/*
+		 * Most other fields depend somehow on max_bucket, so
+		 * we just return--there will be lots of extraneous
+		 * errors.
+		 */
+		isbad = 1;
+		goto err;
+	}
+
+	/*
+	 * max_bucket, high_mask and low_mask: high_mask must be one
+	 * less than the next power of two above max_bucket, and
+	 * low_mask must be one less than the power of two below it.
+	 *
+	 *
+	 */
+	pwr = (m->max_bucket == 0) ? 1 : 1 << __db_log2(m->max_bucket + 1);
+	if (m->high_mask != pwr - 1) {
+		EPRINT((dbp->dbenv,
+		    "Incorrect high_mask %lu on page %lu, should be %lu",
+		    m->high_mask, pgno, pwr - 1));
+		isbad = 1;
+	}
+	pwr >>= 1;
+	if (m->low_mask != pwr - 1) {
+		EPRINT((dbp->dbenv,
+		    "Incorrect low_mask %lu on page %lu, should be %lu",
+		    m->low_mask, pgno, pwr - 1));
+		isbad = 1;
+	}
+
+	/* ffactor: no check possible. */
+	pip->h_ffactor = m->ffactor;
+
+	/*
+	 * nelem: just make sure it's not astronomical for now. This is the
+	 * same check that hash_upgrade does, since there was a bug in 2.X
+	 * which could make nelem go "negative".
+	 */
+	if (m->nelem > 0x80000000) {
+		EPRINT((dbp->dbenv,
+		    "Suspiciously high nelem of %lu on page %lu",
+		    m->nelem, pgno));
+		isbad = 1;
+		pip->h_nelem = 0;
+	} else
+		pip->h_nelem = m->nelem;
+
+	/* flags */
+	if (F_ISSET(&m->dbmeta, DB_HASH_DUP))
+		F_SET(pip, VRFY_HAS_DUPS);
+	if (F_ISSET(&m->dbmeta, DB_HASH_DUPSORT))
+		F_SET(pip, VRFY_HAS_DUPSORT);
+	/* XXX: Why is the DB_HASH_SUBDB flag necessary? */
+
+	/* spares array */
+	for (i = 0; m->spares[i] != 0 && i < NCACHED; i++) {
+		/*
+		 * We set mbucket to the maximum bucket that would use a given
+		 * spares entry;  we want to ensure that it's always less
+		 * than last_pgno.
+		 */
+		mbucket = (1 << i) - 1;
+		if (BS_TO_PAGE(mbucket, m->spares) > vdp->last_pgno) {
+			EPRINT((dbp->dbenv,
+			    "Spares array entry %lu, page %lu is invalid",
+			    i, pgno));
+			isbad = 1;
+		}
+	}
+
+err:	if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+		ret = t_ret;
+	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
+}
+
+/*
+ * __ham_vrfy --
+ *	Verify hash page.
+ *
+ * PUBLIC: int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
+ * PUBLIC:     u_int32_t));
+ */
+int
+__ham_vrfy(dbp, vdp, h, pgno, flags)
+	DB *dbp;
+	VRFY_DBINFO *vdp;
+	PAGE *h;
+	db_pgno_t pgno;
+	u_int32_t flags;
+{
+	VRFY_PAGEINFO *pip;
+	u_int32_t ent, himark, inpend;
+	int isbad, ret, t_ret;
+
+	isbad = 0;
+	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+		return (ret);
+
+	/* Sanity check our flags and page type. */
+	if ((ret = __db_fchk(dbp->dbenv, "__ham_vrfy",
+	    flags, DB_AGGRESSIVE | DB_NOORDERCHK | DB_SALVAGE)) != 0)
+		goto err;
+
+	if (TYPE(h) != P_HASH) {
+		TYPE_ERR_PRINT(dbp->dbenv, "__ham_vrfy", pgno, TYPE(h));
+		DB_ASSERT(0);
+		ret = EINVAL;
+		goto err;
+	}
+
+	/* Verify and save off fields common to all PAGEs. */
+	if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
+		if (ret == DB_VERIFY_BAD)
+			isbad = 1;
+		else
+			goto err;
+	}
+
+	/*
+	 * Verify inp[].  Each offset from 0 to NUM_ENT(h) must be lower
+	 * than the previous one, higher than the current end of the inp array,
+	 * and lower than the page size.
+	 *
+	 * In any case, we return immediately if things are bad, as it would
+	 * be unsafe to proceed.
+	 */
+	for (ent = 0, himark = dbp->pgsize,
+	    inpend = (u_int8_t *)h->inp - (u_int8_t *)h;
+	    ent < NUM_ENT(h); ent++)
+		if (h->inp[ent] >= himark) {
+			EPRINT((dbp->dbenv,
+			    "Item %lu on page %lu out of order or nonsensical",
+			    ent, pgno));
+			isbad = 1;
+			goto err;
+		} else if (inpend >= himark) {
+			EPRINT((dbp->dbenv,
+			    "inp array collided with data on page %lu",
+			    pgno));
+			isbad = 1;
+			goto err;
+
+		} else {
+			himark = h->inp[ent];
+			inpend += sizeof(db_indx_t);
+			if ((ret = __ham_vrfy_item(
+			    dbp, vdp, pgno, h, ent, flags)) != 0)
+				goto err;
+		}
+
+err:	if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+		ret = t_ret;
+	return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret);
+}
+
+/*
+ * __ham_vrfy_item --
+ *	Given a hash page and an offset, sanity-check the item itself,
+ *	and save off any overflow items or off-page dup children as necessary.
+ */
+static int
+__ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
+	DB *dbp;
+	VRFY_DBINFO *vdp;
+	db_pgno_t pgno;
+	PAGE *h;
+	u_int32_t i, flags;
+{
+	HOFFPAGE hop;
+	HOFFDUP hod;
+	VRFY_CHILDINFO child;
+	VRFY_PAGEINFO *pip;
+	db_indx_t offset, len, dlen, elen;
+	int ret, t_ret;
+	u_int8_t *databuf;
+
+	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+		return (ret);
+
+	switch (HPAGE_TYPE(h, i)) {
+	case H_KEYDATA:
+		/* Nothing to do here--everything but the type field is data */
+		break;
+	case H_DUPLICATE:
+		/* Are we a datum or a key?  Better be the former. */
+		if (i % 2 == 0) {
+			EPRINT((dbp->dbenv,
+			    "Hash key stored as duplicate at page %lu item %lu",
+			    pip->pgno, i));
+		}
+		/*
+		 * Dups are encoded as a series within a single HKEYDATA,
+		 * in which each dup is surrounded by a copy of its length
+		 * on either side (so that the series can be walked in either
+		 * direction.  We loop through this series and make sure
+		 * each dup is reasonable.
+		 *
+		 * Note that at this point, we've verified item i-1, so
+		 * it's safe to use LEN_HKEYDATA (which looks at inp[i-1]).
+		 */
+		len = LEN_HKEYDATA(h, dbp->pgsize, i);
+		databuf = HKEYDATA_DATA(P_ENTRY(h, i));
+		for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) {
+			memcpy(&dlen, databuf + offset, sizeof(db_indx_t));
+
+			/* Make sure the length is plausible. */
+			if (offset + DUP_SIZE(dlen) > len) {
+				EPRINT((dbp->dbenv,
+			    "Duplicate item %lu, page %lu has bad length",
+				    i, pip->pgno));
+				ret = DB_VERIFY_BAD;
+				goto err;
+			}
+
+			/*
+			 * Make sure the second copy of the length is the
+			 * same as the first.
+			 */
+			memcpy(&elen,
+			    databuf + offset + dlen + sizeof(db_indx_t),
+			    sizeof(db_indx_t));
+			if (elen != dlen) {
+				EPRINT((dbp->dbenv,
+		"Duplicate item %lu, page %lu has two different lengths",
+				    i, pip->pgno));
+				ret = DB_VERIFY_BAD;
+				goto err;
+			}
+		}
+		F_SET(pip, VRFY_HAS_DUPS);
+		if (!LF_ISSET(DB_NOORDERCHK) &&
+		    __ham_dups_unsorted(dbp, databuf, len))
+			F_SET(pip, VRFY_DUPS_UNSORTED);
+		break;
+	case H_OFFPAGE:
+		/* Offpage item.  Make sure pgno is sane, save off. */
+		memcpy(&hop, P_ENTRY(h, i), HOFFPAGE_SIZE);
+		if (!IS_VALID_PGNO(hop.pgno) || hop.pgno == pip->pgno ||
+		    hop.pgno == PGNO_INVALID) {
+			EPRINT((dbp->dbenv,
+			    "Offpage item %lu, page %lu has bad page number",
+			    i, pip->pgno));
+			ret = DB_VERIFY_BAD;
+			goto err;
+		}
+		memset(&child, 0, sizeof(VRFY_CHILDINFO));
+		child.pgno = hop.pgno;
+		child.type = V_OVERFLOW;
+		child.tlen = hop.tlen; /* This will get checked later. */
+		if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0)
+			goto err;
+		break;
+	case H_OFFDUP:
+		/* Offpage duplicate item.  Same drill. */
+		memcpy(&hod, P_ENTRY(h, i), HOFFDUP_SIZE);
+		if (!IS_VALID_PGNO(hod.pgno) || hod.pgno == pip->pgno ||
+		    hod.pgno == PGNO_INVALID) {
+			EPRINT((dbp->dbenv,
+			    "Offpage item %lu, page %lu has bad page number",
+			    i, pip->pgno));
+			ret = DB_VERIFY_BAD;
+			goto err;
+		}
+		memset(&child, 0, sizeof(VRFY_CHILDINFO));
+		child.pgno = hod.pgno;
+		child.type = V_DUPLICATE;
+		if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0)
+			goto err;
+		F_SET(pip, VRFY_HAS_DUPS);
+		break;
+	default:
+		EPRINT((dbp->dbenv,
+		    "Item %i, page %lu has bad type", i, pip->pgno));
+		ret = DB_VERIFY_BAD;
+		break;
+	}
+
+err:	if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+		ret = t_ret;
+	return (ret);
+}
+
+/*
+ * __ham_vrfy_structure --
+ *	Verify the structure of a hash database.
+ *
+ * PUBLIC: int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t,
+ * PUBLIC:     u_int32_t));
+ */
+int
+__ham_vrfy_structure(dbp, vdp, meta_pgno, flags)
+	DB *dbp;
+	VRFY_DBINFO *vdp;
+	db_pgno_t meta_pgno;
+	u_int32_t flags;
+{
+	DB *pgset;
+	HMETA *m;
+	PAGE *h;
+	VRFY_PAGEINFO *pip;
+	int isbad, p, ret, t_ret;
+	db_pgno_t pgno;
+	u_int32_t bucket;
+
+	ret = isbad = 0;
+	h = NULL;
+	pgset = vdp->pgset;
+
+	if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, &p)) != 0)
+		return (ret);
+	if (p != 0) {
+		EPRINT((dbp->dbenv,
+		    "Hash meta page %lu referenced twice", meta_pgno));
+		return (DB_VERIFY_BAD);
+	}
+	if ((ret = __db_vrfy_pgset_inc(pgset, meta_pgno)) != 0)
+		return (ret);
+
+	/* Get the meta page;  we'll need it frequently. */
+	if ((ret = memp_fget(dbp->mpf, &meta_pgno, 0, &m)) != 0)
+		return (ret);
+
+	/* Loop through bucket by bucket. */
+	for (bucket = 0; bucket <= m->max_bucket; bucket++)
+		if ((ret =
+		    __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)) != 0) {
+			if (ret == DB_VERIFY_BAD)
+				isbad = 1;
+			else
+				goto err;
+		    }
+
+	/*
+	 * There may be unused hash pages corresponding to buckets
+	 * that have been allocated but not yet used.  These may be
+	 * part of the current doubling above max_bucket, or they may
+	 * correspond to buckets that were used in a transaction
+	 * that then aborted.
+	 *
+	 * Loop through them, as far as the spares array defines them,
+	 * and make sure they're all empty.
+	 *
+	 * Note that this should be safe, since we've already verified
+	 * that the spares array is sane.
+	 */
+	for (bucket = m->max_bucket + 1;
+	    m->spares[__db_log2(bucket + 1)] != 0; bucket++) {
+		pgno = BS_TO_PAGE(bucket, m->spares);
+		if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+			goto err;
+
+		/* It's okay if these pages are totally zeroed;  unmark it. */
+		F_CLR(pip, VRFY_IS_ALLZEROES);
+
+		if (pip->type != P_HASH) {
+			EPRINT((dbp->dbenv,
+			    "Hash bucket %lu maps to non-hash page %lu",
+			    bucket, pgno));
+			isbad = 1;
+		} else if (pip->entries != 0) {
+			EPRINT((dbp->dbenv,
+			    "Non-empty page %lu in unused hash bucket %lu",
+			    pgno, bucket));
+			isbad = 1;
+		} else {
+			if ((ret = __db_vrfy_pgset_get(pgset, pgno, &p)) != 0)
+				goto err;
+			if (p != 0) {
+				EPRINT((dbp->dbenv,
+				    "Hash page %lu above max_bucket referenced",
+				    pgno));
+				isbad = 1;
+			} else {
+				if ((ret =
+				    __db_vrfy_pgset_inc(pgset, pgno)) != 0)
+					goto err;
+				if ((ret =
+				    __db_vrfy_putpageinfo(vdp, pip)) != 0)
+					goto err;
+				continue;
+			}
+		}
+
+		/* If we got here, it's an error. */
+		(void)__db_vrfy_putpageinfo(vdp, pip);
+		goto err;
+	}
+
+err:	if ((t_ret = memp_fput(dbp->mpf, m, 0)) != 0)
+		return (t_ret);
+	if (h != NULL && (t_ret = memp_fput(dbp->mpf, h, 0)) != 0)
+		return (t_ret);
+	return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD: ret);
+}
+
+/*
+ * __ham_vrfy_bucket --
+ *	Verify a given bucket.
+ */
+static int
+__ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
+	DB *dbp;
+	VRFY_DBINFO *vdp;
+	HMETA *m;
+	u_int32_t bucket, flags;
+{
+	HASH *hashp;
+	VRFY_CHILDINFO *child;
+	VRFY_PAGEINFO *mip, *pip;
+	int ret, t_ret, isbad, p;
+	db_pgno_t pgno, next_pgno;
+	DBC *cc;
+	u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t));
+
+	isbad = 0;
+	pip = NULL;
+	cc = NULL;
+
+	hashp = dbp->h_internal;
+	if (hashp != NULL && hashp->h_hash != NULL)
+		hfunc = hashp->h_hash;
+	else
+		hfunc = __ham_func5;
+
+	if ((ret = __db_vrfy_getpageinfo(vdp, PGNO(m), &mip)) != 0)
+		return (ret);
+
+	/* Calculate the first pgno for this bucket. */
+	pgno = BS_TO_PAGE(bucket, m->spares);
+
+	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+		goto err;
+
+	/* Make sure we got a plausible page number. */
+	if (pgno > vdp->last_pgno || pip->type != P_HASH) {
+		EPRINT((dbp->dbenv, "Bucket %lu has impossible first page %lu",
+		    bucket, pgno));
+		/* Unsafe to continue. */
+		isbad = 1;
+		goto err;
+	}
+
+	if (pip->prev_pgno != PGNO_INVALID) {
+		EPRINT((dbp->dbenv,
+		    "First hash page %lu in bucket %lu has a prev_pgno", pgno));
+		isbad = 1;
+	}
+
+	/*
+	 * Set flags for dups and sorted dups.
+	 */
+	flags |= F_ISSET(mip, VRFY_HAS_DUPS) ? ST_DUPOK : 0;
+	flags |= F_ISSET(mip, VRFY_HAS_DUPSORT) ? ST_DUPSORT : 0;
+
+	/* Loop until we find a fatal bug, or until we run out of pages. */
+	for (;;) {
+		/* Provide feedback on our progress to the application. */
+		if (!LF_ISSET(DB_SALVAGE))
+			__db_vrfy_struct_feedback(dbp, vdp);
+
+		if ((ret = __db_vrfy_pgset_get(vdp->pgset, pgno, &p)) != 0)
+			goto err;
+		if (p != 0) {
+			EPRINT((dbp->dbenv,
+			    "Hash page %lu referenced twice", pgno));
+			isbad = 1;
+			/* Unsafe to continue. */
+			goto err;
+		} else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, pgno)) != 0)
+			goto err;
+
+		/*
+		 * Hash pages that nothing has ever hashed to may never
+		 * have actually come into existence, and may appear to be
+		 * entirely zeroed.  This is acceptable, and since there's
+		 * no real way for us to know whether this has actually
+		 * occurred, we clear the "wholly zeroed" flag on every
+		 * hash page.  A wholly zeroed page, by nature, will appear
+		 * to have no flags set and zero entries, so should
+		 * otherwise verify correctly.
+		 */
+		F_CLR(pip, VRFY_IS_ALLZEROES);
+
+		/* If we have dups, our meta page had better know about it. */
+		if (F_ISSET(pip, VRFY_HAS_DUPS)
+		    && !F_ISSET(mip, VRFY_HAS_DUPS)) {
+			EPRINT((dbp->dbenv,
+		    "Duplicates present in non-duplicate database, page %lu",
+			    pgno));
+			isbad = 1;
+		}
+
+		/*
+		 * If the database has sorted dups, this page had better
+		 * not have unsorted ones.
+		 */
+		if (F_ISSET(mip, VRFY_HAS_DUPSORT) &&
+		    F_ISSET(pip, VRFY_DUPS_UNSORTED)) {
+			EPRINT((dbp->dbenv,
+			    "Unsorted dups in sorted-dup database, page %lu",
+			    pgno));
+			isbad = 1;
+		}
+
+		/* Walk overflow chains and offpage dup trees. */
+		if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
+			goto err;
+		for (ret = __db_vrfy_ccset(cc, pip->pgno, &child); ret == 0;
+		    ret = __db_vrfy_ccnext(cc, &child))
+			if (child->type == V_OVERFLOW) {
+				if ((ret = __db_vrfy_ovfl_structure(dbp, vdp,
+				    child->pgno, child->tlen, flags)) != 0) {
+					if (ret == DB_VERIFY_BAD)
+						isbad = 1;
+					else
+						goto err;
+				}
+			} else if (child->type == V_DUPLICATE) {
+				if ((ret = __db_vrfy_duptype(dbp,
+				    vdp, child->pgno, flags)) != 0) {
+					isbad = 1;
+					continue;
+				}
+				if ((ret = __bam_vrfy_subtree(dbp, vdp,
+				    child->pgno, NULL, NULL,
+				    flags | ST_RECNUM | ST_DUPSET, NULL,
+				    NULL, NULL)) != 0) {
+					if (ret == DB_VERIFY_BAD)
+						isbad = 1;
+					else
+						goto err;
+				}
+			}
+		if ((ret = __db_vrfy_ccclose(cc)) != 0)
+			goto err;
+		cc = NULL;
+
+		/* If it's safe to check that things hash properly, do so. */
+		if (isbad == 0 && !LF_ISSET(DB_NOORDERCHK) &&
+		    (ret = __ham_vrfy_hashing(dbp, pip->entries,
+		    m, bucket, pgno, flags, hfunc)) != 0) {
+			if (ret == DB_VERIFY_BAD)
+				isbad = 1;
+			else
+				goto err;
+		}
+
+		next_pgno = pip->next_pgno;
+		ret = __db_vrfy_putpageinfo(vdp, pip);
+
+		pip = NULL;
+		if (ret != 0)
+			goto err;
+
+		if (next_pgno == PGNO_INVALID)
+			break;		/* End of the bucket. */
+
+		/* We already checked this, but just in case... */
+		if (!IS_VALID_PGNO(next_pgno)) {
+			DB_ASSERT(0);
+			EPRINT((dbp->dbenv,
+			    "Hash page %lu has bad next_pgno", pgno));
+			isbad = 1;
+			goto err;
+		}
+
+		if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0)
+			goto err;
+
+		if (pip->prev_pgno != pgno) {
+			EPRINT((dbp->dbenv, "Hash page %lu has bad prev_pgno",
+			    next_pgno));
+			isbad = 1;
+		}
+		pgno = next_pgno;
+	}
+
+err:	if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0)
+		ret = t_ret;
+	if (mip != NULL && ((t_ret = __db_vrfy_putpageinfo(vdp, mip)) != 0) &&
+	    ret == 0)
+		ret = t_ret;
+	if (pip != NULL && ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0) &&
+	    ret == 0)
+		ret = t_ret;
+	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
+}
+
+/*
+ * __ham_vrfy_hashing --
+ *	Verify that all items on a given hash page hash correctly.
+ *
+ * PUBLIC: int __ham_vrfy_hashing __P((DB *,
+ * PUBLIC:     u_int32_t, HMETA *, u_int32_t, db_pgno_t, u_int32_t,
+ * PUBLIC:     u_int32_t (*) __P((DB *, const void *, u_int32_t))));
+ */
+int
+__ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc)
+	DB *dbp;
+	u_int32_t nentries;
+	HMETA *m;
+	u_int32_t thisbucket;
+	db_pgno_t pgno;
+	u_int32_t flags;
+	u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t));
+{
+	DBT dbt;
+	PAGE *h;
+	db_indx_t i;
+	int ret, t_ret, isbad;
+	u_int32_t hval, bucket;
+
+	ret = isbad = 0;
+	memset(&dbt, 0, sizeof(DBT));
+	F_SET(&dbt, DB_DBT_REALLOC);
+
+	if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+		return (ret);
+
+	for (i = 0; i < nentries; i += 2) {
+		/*
+		 * We've already verified the page integrity and that of any
+		 * overflow chains linked off it;  it is therefore safe to use
+		 * __db_ret.  It's also not all that much slower, since we have
+		 * to copy every hash item to deal with alignment anyway;  we
+		 * can tweak this a bit if this proves to be a bottleneck,
+		 * but for now, take the easy route.
+		 */
+		if ((ret = __db_ret(dbp, h, i, &dbt, NULL, NULL)) != 0)
+			goto err;
+		hval = hfunc(dbp, dbt.data, dbt.size);
+
+		bucket = hval & m->high_mask;
+		if (bucket > m->max_bucket)
+			bucket = bucket & m->low_mask;
+
+		if (bucket != thisbucket) {
+			EPRINT((dbp->dbenv,
+			    "Item %lu on page %lu hashes incorrectly",
+			    i, pgno));
+			isbad = 1;
+		}
+	}
+
+err:	if (dbt.data != NULL)
+		__os_free(dbt.data, 0);
+	if ((t_ret = memp_fput(dbp->mpf, h, 0)) != 0)
+		return (t_ret);
+
+	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
+}
+
+/*
+ * __ham_salvage --
+ *	Safely dump out anything that looks like a key on an alleged
+ *	hash page.
+ *
+ * PUBLIC: int __ham_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *,
+ * PUBLIC:     void *, int (*)(void *, const void *), u_int32_t));
+ */
+int
+__ham_salvage(dbp, vdp, pgno, h, handle, callback, flags)
+	DB *dbp;
+	VRFY_DBINFO *vdp;
+	db_pgno_t pgno;
+	PAGE *h;
+	void *handle;
+	int (*callback) __P((void *, const void *));
+	u_int32_t flags;
+{
+	DBT dbt, unkdbt;
+	db_pgno_t dpgno;
+	int ret, err_ret, t_ret;
+	u_int32_t himark, tlen;
+	u_int8_t *hk;
+	void *buf;
+	u_int32_t dlen, len, i;
+
+	memset(&dbt, 0, sizeof(DBT));
+	dbt.flags = DB_DBT_REALLOC;
+
+	memset(&unkdbt, 0, sizeof(DBT));
+	unkdbt.size = strlen("UNKNOWN") + 1;
+	unkdbt.data = "UNKNOWN";
+
+	err_ret = 0;
+
+	/*
+	 * Allocate a buffer for overflow items.  Start at one page;
+	 * __db_safe_goff will realloc as needed.
+	 */
+	if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &buf)) != 0)
+		return (ret);
+
+	himark = dbp->pgsize;
+	for (i = 0;; i++) {
+		/* If we're not aggressive, break when we hit NUM_ENT(h). */
+		if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h))
+			break;
+
+		/* Verify the current item. */
+		ret = __db_vrfy_inpitem(dbp,
+		    h, pgno, i, 0, flags, &himark, NULL);
+		/* If this returned a fatality, it's time to break. */
+		if (ret == DB_VERIFY_FATAL)
+			break;
+
+		if (ret == 0) {
+			hk = P_ENTRY(h, i);
+			len = LEN_HKEYDATA(h, dbp->pgsize, i);
+			if ((u_int32_t)(hk + len - (u_int8_t *)h) >
+			    dbp->pgsize) {
+				/*
+				 * Item is unsafely large;  either continue
+				 * or set it to the whole page, depending on
+				 * aggressiveness.
+				 */
+				if (!LF_ISSET(DB_AGGRESSIVE))
+					continue;
+				len = dbp->pgsize -
+				    (u_int32_t)(hk - (u_int8_t *)h);
+				err_ret = DB_VERIFY_BAD;
+			}
+			switch (HPAGE_PTYPE(hk)) {
+			default:
+				if (!LF_ISSET(DB_AGGRESSIVE))
+					break;
+				err_ret = DB_VERIFY_BAD;
+				/* FALLTHROUGH */
+			case H_KEYDATA:
+keydata:			memcpy(buf, HKEYDATA_DATA(hk), len);
+				dbt.size = len;
+				dbt.data = buf;
+				if ((ret = __db_prdbt(&dbt,
+				    0, " ", handle, callback, 0, NULL)) != 0)
+					err_ret = ret;
+				break;
+			case H_OFFPAGE:
+				if (len < HOFFPAGE_SIZE) {
+					err_ret = DB_VERIFY_BAD;
+					continue;
+				}
+				memcpy(&dpgno,
+				    HOFFPAGE_PGNO(hk), sizeof(dpgno));
+				if ((ret = __db_safe_goff(dbp, vdp,
+				    dpgno, &dbt, &buf, flags)) != 0) {
+					err_ret = ret;
+					(void)__db_prdbt(&unkdbt, 0, " ",
+					    handle, callback, 0, NULL);
+					break;
+				}
+				if ((ret = __db_prdbt(&dbt,
+				    0, " ", handle, callback, 0, NULL)) != 0)
+					err_ret = ret;
+				break;
+			case H_OFFDUP:
+				if (len < HOFFPAGE_SIZE) {
+					err_ret = DB_VERIFY_BAD;
+					continue;
+				}
+				memcpy(&dpgno,
+				    HOFFPAGE_PGNO(hk), sizeof(dpgno));
+				/* UNKNOWN iff pgno is bad or we're a key. */
+				if (!IS_VALID_PGNO(dpgno) || (i % 2 == 0)) {
+					if ((ret = __db_prdbt(&unkdbt, 0, " ",
+					    handle, callback, 0, NULL)) != 0)
+						err_ret = ret;
+				} else if ((ret = __db_salvage_duptree(dbp,
+				    vdp, dpgno, &dbt, handle, callback,
+				    flags | SA_SKIPFIRSTKEY)) != 0)
+					err_ret = ret;
+				break;
+			case H_DUPLICATE:
+				/*
+				 * We're a key;  printing dups will seriously
+				 * foul the output.  If we're being aggressive,
+				 * pretend this is a key and let the app.
+				 * programmer sort out the mess.
+				 */
+				if (i % 2 == 0) {
+					err_ret = ret;
+					if (LF_ISSET(DB_AGGRESSIVE))
+						goto keydata;
+					break;
+				}
+
+				/* Too small to have any data. */
+				if (len <
+				    HKEYDATA_SIZE(2 * sizeof(db_indx_t))) {
+					err_ret = DB_VERIFY_BAD;
+					continue;
+				}
+
+				/* Loop until we hit the total length. */
+				for (tlen = 0; tlen + sizeof(db_indx_t) < len;
+				    tlen += dlen) {
+					tlen += sizeof(db_indx_t);
+					memcpy(&dlen, hk, sizeof(db_indx_t));
+					/*
+					 * If dlen is too long, print all the
+					 * rest of the dup set in a chunk.
+					 */
+					if (dlen + tlen > len)
+						dlen = len - tlen;
+					memcpy(buf, hk + tlen, dlen);
+					dbt.size = dlen;
+					dbt.data = buf;
+					if ((ret = __db_prdbt(&dbt, 0, " ",
+					    handle, callback, 0, NULL)) != 0)
+						err_ret = ret;
+					tlen += sizeof(db_indx_t);
+				}
+				break;
+			}
+		}
+	}
+
+	__os_free(buf, 0);
+	if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0)
+		return (t_ret);
+	return ((ret == 0 && err_ret != 0) ? err_ret : ret);
+}
+
+/*
+ * __ham_meta2pgset --
+ *	Return the set of hash pages corresponding to the given
+ *	known-good meta page.
+ *
+ * PUBLIC: int __ham_meta2pgset __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t,
+ * PUBLIC:     DB *));
+ */
+int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset)
+	DB *dbp;
+	VRFY_DBINFO *vdp;
+	HMETA *hmeta;
+	u_int32_t flags;
+	DB *pgset;
+{
+	PAGE *h;
+	db_pgno_t pgno;
+	u_int32_t bucket, totpgs;
+	int ret, val;
+
+	/*
+	 * We don't really need flags, but leave them for consistency with
+	 * __bam_meta2pgset.
+	 */
+	COMPQUIET(flags, 0);
+
+	DB_ASSERT(pgset != NULL);
+
+	totpgs = 0;
+
+	/*
+	 * Loop through all the buckets, pushing onto pgset the corresponding
+	 * page(s) for each one.
+	 */
+	for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) {
+		pgno = BS_TO_PAGE(bucket, hmeta->spares);
+
+		/*
+		 * We know the initial pgno is safe because the spares array has
+		 * been verified.
+		 *
+		 * Safely walk the list of pages in this bucket.
+		 */
+		for (;;) {
+			if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+				return (ret);
+			if (TYPE(h) == P_HASH) {
+
+				/*
+				 * Make sure we don't go past the end of
+				 * pgset.
+				 */
+				if (++totpgs > vdp->last_pgno) {
+					(void)memp_fput(dbp->mpf, h, 0);
+					return (DB_VERIFY_BAD);
+				}
+				if ((ret =
+				    __db_vrfy_pgset_inc(pgset, pgno)) != 0)
+					return (ret);
+
+				pgno = NEXT_PGNO(h);
+			} else
+				pgno = PGNO_INVALID;
+
+			if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+				return (ret);
+
+			/* If the new pgno is wonky, go onto the next bucket. */
+			if (!IS_VALID_PGNO(pgno) ||
+			    pgno == PGNO_INVALID)
+				goto nextbucket;
+
+			/*
+			 * If we've touched this page before, we have a cycle;
+			 * go on to the next bucket.
+			 */
+			if ((ret = __db_vrfy_pgset_get(pgset, pgno, &val)) != 0)
+				return (ret);
+			if (val != 0)
+				goto nextbucket;
+		}
+nextbucket:	;
+	}
+	return (0);
+}
+
+/*
+ * __ham_dups_unsorted --
+ *	Takes a known-safe hash duplicate set and its total length.
+ *	Returns 1 if there are out-of-order duplicates in this set,
+ *	0 if there are not.
+ */
+static int
+__ham_dups_unsorted(dbp, buf, len)
+	DB *dbp;
+	u_int8_t *buf;
+	u_int32_t len;
+{
+	DBT a, b;
+	db_indx_t offset, dlen;
+	int (*func) __P((DB *, const DBT *, const DBT *));
+
+	memset(&a, 0, sizeof(DBT));
+	memset(&b, 0, sizeof(DBT));
+
+	func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
+
+	/*
+	 * Loop through the dup set until we hit the end or we find
+	 * a pair of dups that's out of order.  b is always the current
+	 * dup, a the one before it.
+	 */
+	for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) {
+		memcpy(&dlen, buf + offset, sizeof(db_indx_t));
+		b.data = buf + offset + sizeof(db_indx_t);
+		b.size = dlen;
+
+		if (a.data != NULL && func(dbp, &a, &b) > 0)
+			return (1);
+
+		a.data = b.data;
+		a.size = b.size;
+	}
+
+	return (0);
+}
author	unknown <tim@threads.polyesthetic.msg>	2001-03-04 19:42:05 -0500
committer	unknown <tim@threads.polyesthetic.msg>	2001-03-04 19:42:05 -0500
commit	07dc15a5b0fafaf0a0bcde2768b34aad2f3825fa (patch)
tree	9dd732e08dba156ee3d7635caedc0dc3107ecac6 /bdb/hash
parent	542e1c18dc5bf80665df55ffa04a48d986945259 (diff)
download	mariadb-git-07dc15a5b0fafaf0a0bcde2768b34aad2f3825fa.tar.gz