summaryrefslogtreecommitdiff
path: root/bdb/hash/hash_rec.c
diff options
context:
space:
mode:
authorunknown <tim@threads.polyesthetic.msg>2001-03-04 19:42:05 -0500
committerunknown <tim@threads.polyesthetic.msg>2001-03-04 19:42:05 -0500
commitec6ae091617bdfdca9e65e8d3e65b950d234f676 (patch)
tree9dd732e08dba156ee3d7635caedc0dc3107ecac6 /bdb/hash/hash_rec.c
parent87d70fb598105b64b538ff6b81eef9da626255b1 (diff)
downloadmariadb-git-ec6ae091617bdfdca9e65e8d3e65b950d234f676.tar.gz
Import changeset
Diffstat (limited to 'bdb/hash/hash_rec.c')
-rw-r--r--bdb/hash/hash_rec.c1078
1 files changed, 1078 insertions, 0 deletions
diff --git a/bdb/hash/hash_rec.c b/bdb/hash/hash_rec.c
new file mode 100644
index 00000000000..ded58c281e9
--- /dev/null
+++ b/bdb/hash/hash_rec.c
@@ -0,0 +1,1078 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ * Margo Seltzer. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ * The President and Fellows of Harvard University. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_rec.c,v 11.34 2001/01/11 18:19:52 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "btree.h"
+#include "hash.h"
+#include "lock.h"
+#include "log.h"
+#include "mp.h"
+
+static int __ham_alloc_pages __P((DB *, __ham_groupalloc_args *));
+
+/*
+ * __ham_insdel_recover --
+ *
+ * PUBLIC: int __ham_insdel_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_insdel_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ __ham_insdel_args *argp;
+ DB *file_dbp;
+ DBC *dbc;
+ DB_MPOOLFILE *mpf;
+ PAGE *pagep;
+ u_int32_t opcode;
+ int cmp_n, cmp_p, flags, getmeta, ret, type;
+
+ COMPQUIET(info, NULL);
+
+ getmeta = 0;
+ REC_PRINT(__ham_insdel_print);
+ REC_INTRO(__ham_insdel_read, 1);
+
+ if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if (DB_UNDO(op)) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ goto done;
+ } else if ((ret = memp_fget(mpf, &argp->pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+ }
+
+ if ((ret = __ham_get_meta(dbc)) != 0)
+ goto out;
+ getmeta = 1;
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+ CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+ /*
+ * Two possible things going on:
+ * redo a delete/undo a put: delete the item from the page.
+ * redo a put/undo a delete: add the item to the page.
+ * If we are undoing a delete, then the information logged is the
+ * entire entry off the page, not just the data of a dbt. In
+ * this case, we want to copy it back onto the page verbatim.
+ * We do this by calling __putitem with the type H_OFFPAGE instead
+ * of H_KEYDATA.
+ */
+ opcode = OPCODE_OF(argp->opcode);
+
+ flags = 0;
+ if ((opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) ||
+ (opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) {
+ /*
+ * Need to redo a PUT or undo a delete. If we are undoing a
+ * delete, we've got to restore the item back to its original
+ * position. That's a royal pain in the butt (because we do
+ * not store item lengths on the page), but there's no choice.
+ */
+ if (opcode != DELPAIR ||
+ argp->ndx == (u_int32_t)NUM_ENT(pagep)) {
+ __ham_putitem(pagep, &argp->key,
+ DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ?
+ H_OFFPAGE : H_KEYDATA);
+
+ if (PAIR_ISDATADUP(argp->opcode))
+ type = H_DUPLICATE;
+ else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode))
+ type = H_OFFPAGE;
+ else
+ type = H_KEYDATA;
+ __ham_putitem(pagep, &argp->data, type);
+ } else
+ (void)__ham_reputpair(pagep, file_dbp->pgsize,
+ argp->ndx, &argp->key, &argp->data);
+
+ LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
+ flags = DB_MPOOL_DIRTY;
+
+ } else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op))
+ || (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) {
+ /* Need to undo a put or redo a delete. */
+ __ham_dpair(file_dbp, pagep, argp->ndx);
+ LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
+ flags = DB_MPOOL_DIRTY;
+ }
+
+ if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ goto out;
+
+ /* Return the previous LSN. */
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
+
+out: if (getmeta)
+ (void)__ham_release_meta(dbc);
+ REC_CLOSE;
+}
+
+/*
+ * __ham_newpage_recover --
+ * This log message is used when we add/remove overflow pages. This
+ * message takes care of the pointer chains, not the data on the pages.
+ *
+ * PUBLIC: int __ham_newpage_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_newpage_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ __ham_newpage_args *argp;
+ DB *file_dbp;
+ DBC *dbc;
+ DB_MPOOLFILE *mpf;
+ PAGE *pagep;
+ int cmp_n, cmp_p, flags, getmeta, ret;
+
+ COMPQUIET(info, NULL);
+
+ getmeta = 0;
+ REC_PRINT(__ham_newpage_print);
+ REC_INTRO(__ham_newpage_read, 1);
+
+ if ((ret = memp_fget(mpf, &argp->new_pgno, 0, &pagep)) != 0) {
+ if (DB_UNDO(op)) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ ret = 0;
+ goto ppage;
+ } else if ((ret = memp_fget(mpf, &argp->new_pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+ }
+
+ if ((ret = __ham_get_meta(dbc)) != 0)
+ goto out;
+ getmeta = 1;
+
+ /*
+ * There are potentially three pages we need to check: the one
+ * that we created/deleted, the one before it and the one after
+ * it.
+ */
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+ CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+
+ flags = 0;
+ if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
+ (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
+ /* Redo a create new page or undo a delete new page. */
+ P_INIT(pagep, file_dbp->pgsize, argp->new_pgno,
+ argp->prev_pgno, argp->next_pgno, 0, P_HASH);
+ flags = DB_MPOOL_DIRTY;
+ } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
+ (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
+ /*
+ * Redo a delete or undo a create new page. All we
+ * really need to do is change the LSN.
+ */
+ flags = DB_MPOOL_DIRTY;
+ }
+
+ if (flags)
+ LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
+
+ if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ goto out;
+
+ /* Now do the prev page. */
+ppage: if (argp->prev_pgno != PGNO_INVALID) {
+ if ((ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep)) != 0) {
+ if (DB_UNDO(op)) {
+ /*
+ * We are undoing and the page doesn't exist.
+ * That is equivalent to having a pagelsn of 0,
+ * so we would not have to undo anything. In
+ * this case, don't bother creating a page.
+ */
+ ret = 0;
+ goto npage;
+ } else if ((ret =
+ memp_fget(mpf, &argp->prev_pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+ }
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
+ CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->prevlsn);
+ flags = 0;
+
+ if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
+ (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
+ /* Redo a create new page or undo a delete new page. */
+ pagep->next_pgno = argp->new_pgno;
+ flags = DB_MPOOL_DIRTY;
+ } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
+ (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
+ /* Redo a delete or undo a create new page. */
+ pagep->next_pgno = argp->next_pgno;
+ flags = DB_MPOOL_DIRTY;
+ }
+
+ if (flags)
+ LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
+
+ if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ goto out;
+ }
+
+ /* Now time to do the next page */
+npage: if (argp->next_pgno != PGNO_INVALID) {
+ if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
+ if (DB_UNDO(op)) {
+ /*
+ * We are undoing and the page doesn't exist.
+ * That is equivalent to having a pagelsn of 0,
+ * so we would not have to undo anything. In
+ * this case, don't bother creating a page.
+ */
+ goto done;
+ } else if ((ret =
+ memp_fget(mpf, &argp->next_pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+ }
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
+ CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->nextlsn);
+ flags = 0;
+
+ if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
+ (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
+ /* Redo a create new page or undo a delete new page. */
+ pagep->prev_pgno = argp->new_pgno;
+ flags = DB_MPOOL_DIRTY;
+ } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
+ (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
+ /* Redo a delete or undo a create new page. */
+ pagep->prev_pgno = argp->prev_pgno;
+ flags = DB_MPOOL_DIRTY;
+ }
+
+ if (flags)
+ LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
+
+ if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ goto out;
+ }
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
+
+out: if (getmeta)
+ (void)__ham_release_meta(dbc);
+ REC_CLOSE;
+}
+
+/*
+ * __ham_replace_recover --
+ * This log message refers to partial puts that are local to a single
+ * page. You can think of them as special cases of the more general
+ * insdel log message.
+ *
+ * PUBLIC: int __ham_replace_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_replace_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ __ham_replace_args *argp;
+ DB *file_dbp;
+ DBC *dbc;
+ DB_MPOOLFILE *mpf;
+ DBT dbt;
+ PAGE *pagep;
+ int32_t grow;
+ int cmp_n, cmp_p, flags, getmeta, ret;
+ u_int8_t *hk;
+
+ COMPQUIET(info, NULL);
+
+ getmeta = 0;
+ REC_PRINT(__ham_replace_print);
+ REC_INTRO(__ham_replace_read, 1);
+
+ if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if (DB_UNDO(op)) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ goto done;
+ } else if ((ret = memp_fget(mpf, &argp->pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+ }
+
+ if ((ret = __ham_get_meta(dbc)) != 0)
+ goto out;
+ getmeta = 1;
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+ CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+
+ memset(&dbt, 0, sizeof(dbt));
+ flags = 0;
+ grow = 1;
+
+ if (cmp_p == 0 && DB_REDO(op)) {
+ /* Reapply the change as specified. */
+ dbt.data = argp->newitem.data;
+ dbt.size = argp->newitem.size;
+ grow = argp->newitem.size - argp->olditem.size;
+ LSN(pagep) = *lsnp;
+ flags = DB_MPOOL_DIRTY;
+ } else if (cmp_n == 0 && DB_UNDO(op)) {
+ /* Undo the already applied change. */
+ dbt.data = argp->olditem.data;
+ dbt.size = argp->olditem.size;
+ grow = argp->olditem.size - argp->newitem.size;
+ LSN(pagep) = argp->pagelsn;
+ flags = DB_MPOOL_DIRTY;
+ }
+
+ if (flags) {
+ __ham_onpage_replace(pagep,
+ file_dbp->pgsize, argp->ndx, argp->off, grow, &dbt);
+ if (argp->makedup) {
+ hk = P_ENTRY(pagep, argp->ndx);
+ if (DB_REDO(op))
+ HPAGE_PTYPE(hk) = H_DUPLICATE;
+ else
+ HPAGE_PTYPE(hk) = H_KEYDATA;
+ }
+ }
+
+ if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ goto out;
+
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
+
+out: if (getmeta)
+ (void)__ham_release_meta(dbc);
+ REC_CLOSE;
+}
+
+/*
+ * __ham_splitdata_recover --
+ *
+ * PUBLIC: int __ham_splitdata_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_splitdata_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ __ham_splitdata_args *argp;
+ DB *file_dbp;
+ DBC *dbc;
+ DB_MPOOLFILE *mpf;
+ PAGE *pagep;
+ int cmp_n, cmp_p, flags, getmeta, ret;
+
+ COMPQUIET(info, NULL);
+
+ getmeta = 0;
+ REC_PRINT(__ham_splitdata_print);
+ REC_INTRO(__ham_splitdata_read, 1);
+
+ if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if (DB_UNDO(op)) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ goto done;
+ } else if ((ret = memp_fget(mpf, &argp->pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+ }
+
+ if ((ret = __ham_get_meta(dbc)) != 0)
+ goto out;
+ getmeta = 1;
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+ CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+
+ /*
+ * There are two types of log messages here, one for the old page
+ * and one for the new pages created. The original image in the
+ * SPLITOLD record is used for undo. The image in the SPLITNEW
+ * is used for redo. We should never have a case where there is
+ * a redo operation and the SPLITOLD record is on disk, but not
+ * the SPLITNEW record. Therefore, we only have work to do when
+ * redo NEW messages and undo OLD messages, but we have to update
+ * LSNs in both cases.
+ */
+ flags = 0;
+ if (cmp_p == 0 && DB_REDO(op)) {
+ if (argp->opcode == SPLITNEW)
+ /* Need to redo the split described. */
+ memcpy(pagep, argp->pageimage.data,
+ argp->pageimage.size);
+ LSN(pagep) = *lsnp;
+ flags = DB_MPOOL_DIRTY;
+ } else if (cmp_n == 0 && DB_UNDO(op)) {
+ if (argp->opcode == SPLITOLD) {
+ /* Put back the old image. */
+ memcpy(pagep, argp->pageimage.data,
+ argp->pageimage.size);
+ } else
+ P_INIT(pagep, file_dbp->pgsize, argp->pgno,
+ PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+ LSN(pagep) = argp->pagelsn;
+ flags = DB_MPOOL_DIRTY;
+ }
+ if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ goto out;
+
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
+
+out: if (getmeta)
+ (void)__ham_release_meta(dbc);
+ REC_CLOSE;
+}
+
+/*
+ * __ham_copypage_recover --
+ * Recovery function for copypage.
+ *
+ * PUBLIC: int __ham_copypage_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_copypage_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ __ham_copypage_args *argp;
+ DB *file_dbp;
+ DBC *dbc;
+ DB_MPOOLFILE *mpf;
+ PAGE *pagep;
+ int cmp_n, cmp_p, flags, getmeta, ret;
+
+ COMPQUIET(info, NULL);
+
+ getmeta = 0;
+ REC_PRINT(__ham_copypage_print);
+ REC_INTRO(__ham_copypage_read, 1);
+
+ if ((ret = __ham_get_meta(dbc)) != 0)
+ goto out;
+ getmeta = 1;
+ flags = 0;
+
+ /* This is the bucket page. */
+ if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if (DB_UNDO(op)) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ ret = 0;
+ goto donext;
+ } else if ((ret = memp_fget(mpf, &argp->pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+ }
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+ CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+
+ if (cmp_p == 0 && DB_REDO(op)) {
+ /* Need to redo update described. */
+ memcpy(pagep, argp->page.data, argp->page.size);
+ PGNO(pagep) = argp->pgno;
+ PREV_PGNO(pagep) = PGNO_INVALID;
+ LSN(pagep) = *lsnp;
+ flags = DB_MPOOL_DIRTY;
+ } else if (cmp_n == 0 && DB_UNDO(op)) {
+ /* Need to undo update described. */
+ P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
+ argp->next_pgno, 0, P_HASH);
+ LSN(pagep) = argp->pagelsn;
+ flags = DB_MPOOL_DIRTY;
+ }
+ if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+ goto out;
+
+donext: /* Now fix up the "next" page. */
+ if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
+ if (DB_UNDO(op)) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ ret = 0;
+ goto do_nn;
+ } else if ((ret = memp_fget(mpf, &argp->next_pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+ }
+
+ /* For REDO just update the LSN. For UNDO copy page back. */
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
+ CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->nextlsn);
+ flags = 0;
+ if (cmp_p == 0 && DB_REDO(op)) {
+ LSN(pagep) = *lsnp;
+ flags = DB_MPOOL_DIRTY;
+ } else if (cmp_n == 0 && DB_UNDO(op)) {
+ /* Need to undo update described. */
+ memcpy(pagep, argp->page.data, argp->page.size);
+ flags = DB_MPOOL_DIRTY;
+ }
+ if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+ goto out;
+
+ /* Now fix up the next's next page. */
+do_nn: if (argp->nnext_pgno == PGNO_INVALID)
+ goto done;
+
+ if ((ret = memp_fget(mpf, &argp->nnext_pgno, 0, &pagep)) != 0) {
+ if (DB_UNDO(op)) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ goto done;
+ } else if ((ret = memp_fget(mpf, &argp->nnext_pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+ }
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->nnextlsn);
+ CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->nnextlsn);
+
+ flags = 0;
+ if (cmp_p == 0 && DB_REDO(op)) {
+ /* Need to redo update described. */
+ PREV_PGNO(pagep) = argp->pgno;
+ LSN(pagep) = *lsnp;
+ flags = DB_MPOOL_DIRTY;
+ } else if (cmp_n == 0 && DB_UNDO(op)) {
+ /* Need to undo update described. */
+ PREV_PGNO(pagep) = argp->next_pgno;
+ LSN(pagep) = argp->nnextlsn;
+ flags = DB_MPOOL_DIRTY;
+ }
+ if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+ goto out;
+
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
+
+out: if (getmeta)
+ (void)__ham_release_meta(dbc);
+ REC_CLOSE;
+}
+
+/*
+ * __ham_metagroup_recover --
+ * Recovery function for metagroup.
+ *
+ * PUBLIC: int __ham_metagroup_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_metagroup_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ __ham_metagroup_args *argp;
+ HASH_CURSOR *hcp;
+ DB *file_dbp;
+ DBC *dbc;
+ DB_MPOOLFILE *mpf;
+ PAGE *pagep;
+ db_pgno_t last_pgno;
+ int cmp_n, cmp_p, flags, groupgrow, ret;
+
+ COMPQUIET(info, NULL);
+ REC_PRINT(__ham_metagroup_print);
+ REC_INTRO(__ham_metagroup_read, 1);
+
+ /*
+ * This logs the virtual create of pages pgno to pgno + bucket
+ * Since the mpool page-allocation is not really able to be
+ * transaction protected, we can never undo it. Even in an abort,
+ * we have to allocate these pages to the hash table.
+ * The log record contains:
+ * bucket: new bucket being allocated.
+ * pgno: page number of the new bucket.
+ * if bucket is a power of 2, then we allocated a whole batch of
+ * pages; if it's not, then we simply allocated one new page.
+ */
+ groupgrow =
+ (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1;
+
+ last_pgno = argp->pgno;
+ if (groupgrow)
+ /* Read the last page. */
+ last_pgno += argp->bucket;
+
+ if ((ret = memp_fget(mpf, &last_pgno, DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+ CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
+
+ flags = 0;
+ if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && DB_UNDO(op))) {
+ /*
+ * We need to make sure that we redo the allocation of the
+ * pages.
+ */
+ if (DB_REDO(op))
+ pagep->lsn = *lsnp;
+ else
+ pagep->lsn = argp->pagelsn;
+ flags = DB_MPOOL_DIRTY;
+ }
+ if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+ goto out;
+
+ /* Now we have to update the meta-data page. */
+ hcp = (HASH_CURSOR *)dbc->internal;
+ if ((ret = __ham_get_meta(dbc)) != 0)
+ goto out;
+ cmp_n = log_compare(lsnp, &hcp->hdr->dbmeta.lsn);
+ cmp_p = log_compare(&hcp->hdr->dbmeta.lsn, &argp->metalsn);
+ CHECK_LSN(op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn);
+ if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && DB_UNDO(op))) {
+ if (DB_REDO(op)) {
+ /* Redo the actual updating of bucket counts. */
+ ++hcp->hdr->max_bucket;
+ if (groupgrow) {
+ hcp->hdr->low_mask = hcp->hdr->high_mask;
+ hcp->hdr->high_mask =
+ (argp->bucket + 1) | hcp->hdr->low_mask;
+ }
+ hcp->hdr->dbmeta.lsn = *lsnp;
+ } else {
+ /* Undo the actual updating of bucket counts. */
+ --hcp->hdr->max_bucket;
+ if (groupgrow) {
+ hcp->hdr->high_mask = hcp->hdr->low_mask;
+ hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
+ }
+ hcp->hdr->dbmeta.lsn = argp->metalsn;
+ }
+ if (groupgrow &&
+ hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] ==
+ PGNO_INVALID)
+ hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] =
+ argp->pgno - argp->bucket - 1;
+ F_SET(hcp, H_DIRTY);
+ }
+ if ((ret = __ham_release_meta(dbc)) != 0)
+ goto out;
+
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
+
+out: REC_CLOSE;
+}
+
+/*
+ * __ham_groupalloc_recover --
+ * Recover the batch creation of a set of pages for a new database.
+ *
+ * PUBLIC: int __ham_groupalloc_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__ham_groupalloc_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ __ham_groupalloc_args *argp;
+ DBMETA *mmeta;
+ DB_MPOOLFILE *mpf;
+ DB *file_dbp;
+ DBC *dbc;
+ db_pgno_t pgno;
+ int cmp_n, cmp_p, flags, ret;
+
+ REC_PRINT(__ham_groupalloc_print);
+ REC_INTRO(__ham_groupalloc_read, 0);
+
+ pgno = PGNO_BASE_MD;
+ if ((ret = memp_fget(mpf, &pgno, 0, &mmeta)) != 0) {
+ if (DB_REDO(op)) {
+ /* Page should have existed. */
+ (void)__db_pgerr(file_dbp, pgno);
+ goto out;
+ } else {
+ ret = 0;
+ goto done;
+ }
+ }
+
+ cmp_n = log_compare(lsnp, &LSN(mmeta));
+ cmp_p = log_compare(&LSN(mmeta), &argp->meta_lsn);
+ CHECK_LSN(op, cmp_p, &LSN(mmeta), &argp->meta_lsn);
+
+ /*
+ * Basically, we used mpool to allocate a chunk of pages.
+ * We need to either add those to a free list (in the undo
+ * case) or initialize them (in the redo case).
+ *
+ * If we are redoing and this is a hash subdatabase, it's possible
+ * that the pages were never allocated, so we'd better check for
+ * that and handle it here.
+ */
+
+ flags = 0;
+ if (DB_REDO(op)) {
+ if ((ret = __ham_alloc_pages(file_dbp, argp)) != 0)
+ goto out1;
+ if (cmp_p == 0) {
+ LSN(mmeta) = *lsnp;
+ flags = DB_MPOOL_DIRTY;
+ }
+ }
+
+ /*
+ * Always put the pages into the limbo list and free them later.
+ */
+ else if (DB_UNDO(op)) {
+ if ((ret = __db_add_limbo(dbenv,
+ info, argp->fileid, argp->start_pgno, argp->num)) != 0)
+ goto out;
+ if (cmp_n == 0) {
+ LSN(mmeta) = argp->meta_lsn;
+ flags = DB_MPOOL_DIRTY;
+ }
+ }
+
+out1: if ((ret = memp_fput(mpf, mmeta, flags)) != 0)
+ goto out;
+
+done: if (ret == 0)
+ *lsnp = argp->prev_lsn;
+
+out: REC_CLOSE;
+}
+
+/*
+ * __ham_alloc_pages --
+ *
+ * Called during redo of a file create. We create new pages in the file
+ * using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a
+ * __crdel_metasub message. If we manage to crash without the newly written
+ * pages getting to disk (I'm not sure this can happen anywhere except our
+ * test suite?!), then we need to go through a recreate the final pages.
+ * Hash normally has holes in its files and handles them appropriately.
+ */
+static int
+__ham_alloc_pages(dbp, argp)
+ DB *dbp;
+ __ham_groupalloc_args *argp;
+{
+ DB_MPOOLFILE *mpf;
+ PAGE *pagep;
+ db_pgno_t pgno;
+ int ret;
+
+ mpf = dbp->mpf;
+
+ /* Read the last page of the allocation. */
+ pgno = argp->start_pgno + argp->num - 1;
+
+ /* If the page exists, and it has been initialized, then we're done. */
+ if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) == 0) {
+ if ((pagep->type == P_INVALID) && IS_ZERO_LSN(pagep->lsn))
+ goto reinit_page;
+ if ((ret = memp_fput(mpf, pagep, 0)) != 0)
+ return (ret);
+ return (0);
+ }
+
+ /*
+ * Had to create the page. On some systems (read "Windows"),
+ * you can find random garbage on pages to which you haven't
+ * yet written. So, we have an os layer that will do the
+ * right thing for group allocations. We call that directly
+ * to make sure all the pages are allocated and then continue
+ * merrily on our way with normal recovery.
+ */
+ if ((ret = __os_fpinit(dbp->dbenv, &mpf->fh,
+ argp->start_pgno, argp->num, dbp->pgsize)) != 0)
+ return (ret);
+
+ if ((ret = memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
+ (void)__db_pgerr(dbp, pgno);
+ return (ret);
+ }
+
+reinit_page:
+ /* Initialize the newly allocated page. */
+ P_INIT(pagep,
+ dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+ ZERO_LSN(pagep->lsn);
+
+ if ((ret = memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
+ return (ret);
+
+ return (0);
+}
+
+/*
+ * __ham_curadj_recover --
+ * Undo cursor adjustments if a subtransaction fails.
+ *
+ * PUBLIC: int __ham_curadj_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+
+int
+__ham_curadj_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ __ham_curadj_args *argp;
+ DB_MPOOLFILE *mpf;
+ DB *file_dbp;
+ DBC *dbc;
+ int ret;
+ HASH_CURSOR *hcp;
+
+ REC_PRINT(__ham_groupalloc_print);
+
+ ret = 0;
+ if (op != DB_TXN_ABORT)
+ goto done;
+ REC_INTRO(__ham_curadj_read, 0);
+
+ COMPQUIET(info, NULL);
+ /*
+ * Undo the adjustment by reinitializing the the cursor
+ * to look like the one that was used to do the adustment,
+ * then we invert the add so that undo the adjustment.
+ */
+ hcp = (HASH_CURSOR *)dbc->internal;
+ hcp->pgno = argp->pgno;
+ hcp->indx = argp->indx;
+ hcp->dup_off = argp->dup_off;
+ hcp->order = argp->order;
+ if (!argp->add)
+ F_SET(hcp, H_DELETED);
+ (void)__ham_c_update(dbc, argp->len, !argp->add, argp->is_dup);
+
+done: *lsnp = argp->prev_lsn;
+out: REC_CLOSE;
+}
+
+/*
+ * __ham_chgpg_recover --
+ * Undo cursor adjustments if a subtransaction fails.
+ *
+ * PUBLIC: int __ham_chgpg_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+
+int
+__ham_chgpg_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ __ham_chgpg_args *argp;
+ BTREE_CURSOR *opdcp;
+ DB_MPOOLFILE *mpf;
+ DB *file_dbp, *ldbp;
+ DBC *dbc;
+ int ret;
+ DBC *cp;
+ HASH_CURSOR *lcp;
+
+ REC_PRINT(__ham_chgpg_print);
+
+ ret = 0;
+ if (op != DB_TXN_ABORT)
+ goto out;
+ REC_INTRO(__ham_chgpg_read, 0);
+
+ COMPQUIET(info, NULL);
+
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ for (ldbp = __dblist_get(dbenv, file_dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == file_dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+ MUTEX_THREAD_LOCK(dbenv, file_dbp->mutexp);
+
+ for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+ cp = TAILQ_NEXT(cp, links)) {
+ lcp = (HASH_CURSOR *)cp->internal;
+
+ switch (argp->mode) {
+ case DB_HAM_CHGPG:
+ if (lcp->pgno != argp->new_pgno)
+ break;
+
+ if (argp->old_indx == NDX_INVALID)
+ lcp->pgno = argp->old_pgno;
+ else if (lcp->indx == argp->new_indx) {
+ lcp->indx = argp->old_indx;
+ lcp->pgno = argp->old_pgno;
+ }
+ break;
+
+ case DB_HAM_SPLIT:
+ if (lcp->pgno == argp->new_pgno
+ && lcp->indx == argp->new_indx) {
+ lcp->indx = argp->old_indx;
+ lcp->pgno = argp->old_pgno;
+ }
+ break;
+
+ case DB_HAM_DUP:
+ if (lcp->opd != NULL) {
+ opdcp =
+ (BTREE_CURSOR *)lcp->opd->internal;
+ if (opdcp->pgno == argp->new_pgno &&
+ opdcp->indx == argp->new_indx) {
+ if (F_ISSET(opdcp, C_DELETED))
+ F_SET(lcp, H_DELETED);
+ if ((ret =
+ lcp->opd->c_close(
+ lcp->opd)) != 0)
+ goto out;
+ lcp->opd = NULL;
+ }
+ }
+ break;
+ }
+ }
+
+ MUTEX_THREAD_UNLOCK(dbenv, file_dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
+out: REC_CLOSE;
+}