summaryrefslogtreecommitdiff
path: root/bdb/db/db_dispatch.c
diff options
context:
space:
mode:
authorunknown <tim@threads.polyesthetic.msg>2001-03-04 19:42:05 -0500
committerunknown <tim@threads.polyesthetic.msg>2001-03-04 19:42:05 -0500
commitec6ae091617bdfdca9e65e8d3e65b950d234f676 (patch)
tree9dd732e08dba156ee3d7635caedc0dc3107ecac6 /bdb/db/db_dispatch.c
parent87d70fb598105b64b538ff6b81eef9da626255b1 (diff)
downloadmariadb-git-ec6ae091617bdfdca9e65e8d3e65b950d234f676.tar.gz
Import changeset
Diffstat (limited to 'bdb/db/db_dispatch.c')
-rw-r--r--bdb/db/db_dispatch.c983
1 files changed, 983 insertions, 0 deletions
diff --git a/bdb/db/db_dispatch.c b/bdb/db/db_dispatch.c
new file mode 100644
index 00000000000..c9beac401a7
--- /dev/null
+++ b/bdb/db/db_dispatch.c
@@ -0,0 +1,983 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ * The President and Fellows of Harvard University. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: db_dispatch.c,v 11.41 2001/01/11 18:19:50 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "db_am.h"
+#include "log_auto.h"
+#include "txn.h"
+#include "txn_auto.h"
+#include "log.h"
+
+static int __db_txnlist_find_internal __P((void *, db_txnlist_type,
+ u_int32_t, u_int8_t [DB_FILE_ID_LEN], DB_TXNLIST **, int));
+
+/*
+ * __db_dispatch --
+ *
+ * This is the transaction dispatch function used by the db access methods.
+ * It is designed to handle the record format used by all the access
+ * methods (the one automatically generated by the db_{h,log,read}.sh
+ * scripts in the tools directory). An application using a different
+ * recovery paradigm will supply a different dispatch function to txn_open.
+ *
+ * PUBLIC: int __db_dispatch __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__db_dispatch(dbenv, db, lsnp, redo, info)
+ DB_ENV *dbenv; /* The environment. */
+ DBT *db; /* The log record upon which to dispatch. */
+ DB_LSN *lsnp; /* The lsn of the record being dispatched. */
+ db_recops redo; /* Redo this op (or undo it). */
+ void *info;
+{
+ u_int32_t rectype, txnid;
+ int make_call, ret;
+
+ memcpy(&rectype, db->data, sizeof(rectype));
+ memcpy(&txnid, (u_int8_t *)db->data + sizeof(rectype), sizeof(txnid));
+ make_call = ret = 0;
+
+ /*
+ * If we find a record that is in the user's number space and they
+ * have specified a recovery routine, let them handle it. If they
+ * didn't specify a recovery routine, then we expect that they've
+ * followed all our rules and registered new recovery functions.
+ */
+ switch (redo) {
+ case DB_TXN_ABORT:
+ /*
+ * XXX
+ * db_printlog depends on DB_TXN_ABORT not examining the TXN
+ * list. If that ever changes, fix db_printlog too.
+ */
+ make_call = 1;
+ break;
+ case DB_TXN_OPENFILES:
+ if (rectype == DB_log_register)
+ return (dbenv->dtab[rectype](dbenv,
+ db, lsnp, redo, info));
+ break;
+ case DB_TXN_BACKWARD_ROLL:
+ /*
+ * Running full recovery in the backward pass. If we've
+ * seen this txnid before and added to it our commit list,
+ * then we do nothing during this pass, unless this is a child
+ * commit record, in which case we need to process it. If
+ * we've never seen it, then we call the appropriate recovery
+ * routine.
+ *
+ * We need to always undo DB_db_noop records, so that we
+ * properly handle any aborts before the file was closed.
+ */
+ if (rectype == DB_log_register ||
+ rectype == DB_txn_ckp || rectype == DB_db_noop
+ || rectype == DB_txn_child || (txnid != 0 &&
+ (ret = __db_txnlist_find(info, txnid)) != 0)) {
+ make_call = 1;
+ if (ret == DB_NOTFOUND && rectype != DB_txn_regop &&
+ rectype != DB_txn_xa_regop && (ret =
+ __db_txnlist_add(dbenv, info, txnid, 1)) != 0)
+ return (ret);
+ }
+ break;
+ case DB_TXN_FORWARD_ROLL:
+ /*
+ * In the forward pass, if we haven't seen the transaction,
+ * do nothing, else recovery it.
+ *
+ * We need to always redo DB_db_noop records, so that we
+ * properly handle any commits after the file was closed.
+ */
+ if (rectype == DB_log_register ||
+ rectype == DB_txn_ckp ||
+ rectype == DB_db_noop ||
+ __db_txnlist_find(info, txnid) == 0)
+ make_call = 1;
+ break;
+ default:
+ return (__db_unknown_flag(dbenv, "__db_dispatch", redo));
+ }
+
+ if (make_call) {
+ if (rectype >= DB_user_BEGIN && dbenv->tx_recover != NULL)
+ return (dbenv->tx_recover(dbenv, db, lsnp, redo));
+ else
+ return (dbenv->dtab[rectype](dbenv, db, lsnp, redo, info));
+ }
+
+ return (0);
+}
+
+/*
+ * __db_add_recovery --
+ *
+ * PUBLIC: int __db_add_recovery __P((DB_ENV *,
+ * PUBLIC: int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), u_int32_t));
+ */
+int
+__db_add_recovery(dbenv, func, ndx)
+ DB_ENV *dbenv;
+ int (*func) __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ u_int32_t ndx;
+{
+ u_int32_t i, nsize;
+ int ret;
+
+ /* Check if we have to grow the table. */
+ if (ndx >= dbenv->dtab_size) {
+ nsize = ndx + 40;
+ if ((ret = __os_realloc(dbenv,
+ nsize * sizeof(dbenv->dtab[0]), NULL, &dbenv->dtab)) != 0)
+ return (ret);
+ for (i = dbenv->dtab_size; i < nsize; ++i)
+ dbenv->dtab[i] = NULL;
+ dbenv->dtab_size = nsize;
+ }
+
+ dbenv->dtab[ndx] = func;
+ return (0);
+}
+
+/*
+ * __deprecated_recover --
+ * Stub routine for deprecated recovery functions.
+ *
+ * PUBLIC: int __deprecated_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__deprecated_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ COMPQUIET(dbenv, NULL);
+ COMPQUIET(dbtp, NULL);
+ COMPQUIET(lsnp, NULL);
+ COMPQUIET(op, 0);
+ COMPQUIET(info, NULL);
+ return (EINVAL);
+}
+
+/*
+ * __db_txnlist_init --
+ * Initialize transaction linked list.
+ *
+ * PUBLIC: int __db_txnlist_init __P((DB_ENV *, void *));
+ */
+int
+__db_txnlist_init(dbenv, retp)
+ DB_ENV *dbenv;
+ void *retp;
+{
+ DB_TXNHEAD *headp;
+ int ret;
+
+ if ((ret = __os_malloc(dbenv, sizeof(DB_TXNHEAD), NULL, &headp)) != 0)
+ return (ret);
+
+ LIST_INIT(&headp->head);
+ headp->maxid = 0;
+ headp->generation = 1;
+
+ *(void **)retp = headp;
+ return (0);
+}
+
+/*
+ * __db_txnlist_add --
+ * Add an element to our transaction linked list.
+ *
+ * PUBLIC: int __db_txnlist_add __P((DB_ENV *, void *, u_int32_t, int32_t));
+ */
+int
+__db_txnlist_add(dbenv, listp, txnid, aborted)
+ DB_ENV *dbenv;
+ void *listp;
+ u_int32_t txnid;
+ int32_t aborted;
+{
+ DB_TXNHEAD *hp;
+ DB_TXNLIST *elp;
+ int ret;
+
+ if ((ret = __os_malloc(dbenv, sizeof(DB_TXNLIST), NULL, &elp)) != 0)
+ return (ret);
+
+ hp = (DB_TXNHEAD *)listp;
+ LIST_INSERT_HEAD(&hp->head, elp, links);
+
+ elp->type = TXNLIST_TXNID;
+ elp->u.t.txnid = txnid;
+ elp->u.t.aborted = aborted;
+ if (txnid > hp->maxid)
+ hp->maxid = txnid;
+ elp->u.t.generation = hp->generation;
+
+ return (0);
+}
+/*
+ * __db_txnlist_remove --
+ * Remove an element from our transaction linked list.
+ *
+ * PUBLIC: int __db_txnlist_remove __P((void *, u_int32_t));
+ */
+int
+__db_txnlist_remove(listp, txnid)
+ void *listp;
+ u_int32_t txnid;
+{
+ DB_TXNLIST *entry;
+
+ return (__db_txnlist_find_internal(listp,
+ TXNLIST_TXNID, txnid, NULL, &entry, 1));
+}
+
+/* __db_txnlist_close --
+ *
+ * Call this when we close a file. It allows us to reconcile whether
+ * we have done any operations on this file with whether the file appears
+ * to have been deleted. If you never do any operations on a file, then
+ * we assume it's OK to appear deleted.
+ *
+ * PUBLIC: int __db_txnlist_close __P((void *, int32_t, u_int32_t));
+ */
+
+int
+__db_txnlist_close(listp, lid, count)
+ void *listp;
+ int32_t lid;
+ u_int32_t count;
+{
+ DB_TXNHEAD *hp;
+ DB_TXNLIST *p;
+
+ hp = (DB_TXNHEAD *)listp;
+ for (p = LIST_FIRST(&hp->head); p != NULL; p = LIST_NEXT(p, links)) {
+ if (p->type == TXNLIST_DELETE)
+ if (lid == p->u.d.fileid &&
+ !F_ISSET(&p->u.d, TXNLIST_FLAG_CLOSED)) {
+ p->u.d.count += count;
+ return (0);
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * __db_txnlist_delete --
+ *
+ * Record that a file was missing or deleted. If the deleted
+ * flag is set, then we've encountered a delete of a file, else we've
+ * just encountered a file that is missing. The lid is the log fileid
+ * and is only meaningful if deleted is not equal to 0.
+ *
+ * PUBLIC: int __db_txnlist_delete __P((DB_ENV *,
+ * PUBLIC: void *, char *, u_int32_t, int));
+ */
+int
+__db_txnlist_delete(dbenv, listp, name, lid, deleted)
+ DB_ENV *dbenv;
+ void *listp;
+ char *name;
+ u_int32_t lid;
+ int deleted;
+{
+ DB_TXNHEAD *hp;
+ DB_TXNLIST *p;
+ int ret;
+
+ hp = (DB_TXNHEAD *)listp;
+ for (p = LIST_FIRST(&hp->head); p != NULL; p = LIST_NEXT(p, links)) {
+ if (p->type == TXNLIST_DELETE)
+ if (strcmp(name, p->u.d.fname) == 0) {
+ if (deleted)
+ F_SET(&p->u.d, TXNLIST_FLAG_DELETED);
+ else
+ F_CLR(&p->u.d, TXNLIST_FLAG_CLOSED);
+ return (0);
+ }
+ }
+
+ /* Need to add it. */
+ if ((ret = __os_malloc(dbenv, sizeof(DB_TXNLIST), NULL, &p)) != 0)
+ return (ret);
+ LIST_INSERT_HEAD(&hp->head, p, links);
+
+ p->type = TXNLIST_DELETE;
+ p->u.d.flags = 0;
+ if (deleted)
+ F_SET(&p->u.d, TXNLIST_FLAG_DELETED);
+ p->u.d.fileid = lid;
+ p->u.d.count = 0;
+ ret = __os_strdup(dbenv, name, &p->u.d.fname);
+
+ return (ret);
+}
+
+/*
+ * __db_txnlist_end --
+ * Discard transaction linked list. Print out any error messages
+ * for deleted files.
+ *
+ * PUBLIC: void __db_txnlist_end __P((DB_ENV *, void *));
+ */
+void
+__db_txnlist_end(dbenv, listp)
+ DB_ENV *dbenv;
+ void *listp;
+{
+ DB_TXNHEAD *hp;
+ DB_TXNLIST *p;
+ DB_LOG *lp;
+
+ hp = (DB_TXNHEAD *)listp;
+ lp = (DB_LOG *)dbenv->lg_handle;
+ while (hp != NULL && (p = LIST_FIRST(&hp->head)) != NULL) {
+ LIST_REMOVE(p, links);
+ switch (p->type) {
+ case TXNLIST_DELETE:
+ /*
+ * If we have a file that is not deleted and has
+ * some operations, we flag the warning. Since
+ * the file could still be open, we need to check
+ * the actual log table as well.
+ */
+ if ((!F_ISSET(&p->u.d, TXNLIST_FLAG_DELETED) &&
+ p->u.d.count != 0) ||
+ (!F_ISSET(&p->u.d, TXNLIST_FLAG_CLOSED) &&
+ p->u.d.fileid != (int32_t) TXNLIST_INVALID_ID &&
+ p->u.d.fileid < lp->dbentry_cnt &&
+ lp->dbentry[p->u.d.fileid].count != 0))
+ __db_err(dbenv, "warning: %s: %s",
+ p->u.d.fname, db_strerror(ENOENT));
+ __os_freestr(p->u.d.fname);
+ break;
+ case TXNLIST_LSN:
+ __os_free(p->u.l.lsn_array,
+ p->u.l.maxn * sizeof(DB_LSN));
+ break;
+ default:
+ /* Possibly an incomplete DB_TXNLIST; just free it. */
+ break;
+ }
+ __os_free(p, sizeof(DB_TXNLIST));
+ }
+ __os_free(listp, sizeof(DB_TXNHEAD));
+}
+
+/*
+ * __db_txnlist_find --
+ * Checks to see if a txnid with the current generation is in the
+ * txnid list. This returns DB_NOTFOUND if the item isn't in the
+ * list otherwise it returns (like __db_txnlist_find_internal) a
+ * 1 or 0 indicating if the transaction is aborted or not. A txnid
+ * of 0 means the record was generated while not in a transaction.
+ *
+ * PUBLIC: int __db_txnlist_find __P((void *, u_int32_t));
+ */
+int
+__db_txnlist_find(listp, txnid)
+ void *listp;
+ u_int32_t txnid;
+{
+ DB_TXNLIST *entry;
+
+ if (txnid == 0)
+ return (DB_NOTFOUND);
+ return (__db_txnlist_find_internal(listp,
+ TXNLIST_TXNID, txnid, NULL, &entry, 0));
+}
+
+/*
+ * __db_txnlist_find_internal --
+ * Find an entry on the transaction list.
+ * If the entry is not there or the list pointeris not initialized
+ * we return DB_NOTFOUND. If the item is found, we return the aborted
+ * status (1 for aborted, 0 for not aborted). Currently we always call
+ * this with an initialized list pointer but checking for NULL keeps it general.
+ */
+static int
+__db_txnlist_find_internal(listp, type, txnid, uid, txnlistp, delete)
+ void *listp;
+ db_txnlist_type type;
+ u_int32_t txnid;
+ u_int8_t uid[DB_FILE_ID_LEN];
+ DB_TXNLIST **txnlistp;
+ int delete;
+{
+ DB_TXNHEAD *hp;
+ DB_TXNLIST *p;
+ int ret;
+
+ if ((hp = (DB_TXNHEAD *)listp) == NULL)
+ return (DB_NOTFOUND);
+
+ for (p = LIST_FIRST(&hp->head); p != NULL; p = LIST_NEXT(p, links)) {
+ if (p->type != type)
+ continue;
+ switch (type) {
+ case TXNLIST_TXNID:
+ if (p->u.t.txnid != txnid ||
+ hp->generation != p->u.t.generation)
+ continue;
+ ret = p->u.t.aborted;
+ break;
+
+ case TXNLIST_PGNO:
+ if (memcmp(uid, p->u.p.uid, DB_FILE_ID_LEN) != 0)
+ continue;
+
+ ret = 0;
+ break;
+ default:
+ DB_ASSERT(0);
+ ret = EINVAL;
+ }
+ if (delete == 1) {
+ LIST_REMOVE(p, links);
+ __os_free(p, sizeof(DB_TXNLIST));
+ } else if (p != LIST_FIRST(&hp->head)) {
+ /* Move it to head of list. */
+ LIST_REMOVE(p, links);
+ LIST_INSERT_HEAD(&hp->head, p, links);
+ }
+ *txnlistp = p;
+ return (ret);
+ }
+
+ return (DB_NOTFOUND);
+}
+
+/*
+ * __db_txnlist_gen --
+ * Change the current generation number.
+ *
+ * PUBLIC: void __db_txnlist_gen __P((void *, int));
+ */
+void
+__db_txnlist_gen(listp, incr)
+ void *listp;
+ int incr;
+{
+ DB_TXNHEAD *hp;
+
+ /*
+ * During recovery generation numbers keep track of how many "restart"
+ * checkpoints we've seen. Restart checkpoints occur whenever we take
+ * a checkpoint and there are no outstanding transactions. When that
+ * happens, we can reset transaction IDs back to 1. It always happens
+ * at recovery and it prevents us from exhausting the transaction IDs
+ * name space.
+ */
+ hp = (DB_TXNHEAD *)listp;
+ hp->generation += incr;
+}
+
+#define TXN_BUBBLE(AP, MAX) { \
+ int __j; \
+ DB_LSN __tmp; \
+ \
+ for (__j = 0; __j < MAX - 1; __j++) \
+ if (log_compare(&AP[__j], &AP[__j + 1]) < 0) { \
+ __tmp = AP[__j]; \
+ AP[__j] = AP[__j + 1]; \
+ AP[__j + 1] = __tmp; \
+ } \
+}
+
+/*
+ * __db_txnlist_lsnadd --
+ * Add to or re-sort the transaction list lsn entry.
+ * Note that since this is used during an abort, the __txn_undo
+ * code calls into the "recovery" subsystem explicitly, and there
+ * is only a single TXNLIST_LSN entry on the list.
+ *
+ * PUBLIC: int __db_txnlist_lsnadd __P((DB_ENV *, void *, DB_LSN *, u_int32_t));
+ */
+int
+__db_txnlist_lsnadd(dbenv, listp, lsnp, flags)
+ DB_ENV *dbenv;
+ void *listp;
+ DB_LSN *lsnp;
+ u_int32_t flags;
+{
+ DB_TXNHEAD *hp;
+ DB_TXNLIST *elp;
+ int i, ret;
+
+ hp = (DB_TXNHEAD *)listp;
+
+ for (elp = LIST_FIRST(&hp->head);
+ elp != NULL; elp = LIST_NEXT(elp, links))
+ if (elp->type == TXNLIST_LSN)
+ break;
+
+ if (elp == NULL)
+ return (EINVAL);
+
+ if (LF_ISSET(TXNLIST_NEW)) {
+ if (elp->u.l.ntxns >= elp->u.l.maxn) {
+ if ((ret = __os_realloc(dbenv,
+ 2 * elp->u.l.maxn * sizeof(DB_LSN),
+ NULL, &elp->u.l.lsn_array)) != 0)
+ return (ret);
+ elp->u.l.maxn *= 2;
+ }
+ elp->u.l.lsn_array[elp->u.l.ntxns++] = *lsnp;
+ } else
+ /* Simply replace the 0th element. */
+ elp->u.l.lsn_array[0] = *lsnp;
+
+ /*
+ * If we just added a new entry and there may be NULL
+ * entries, so we have to do a complete bubble sort,
+ * not just trickle a changed entry around.
+ */
+ for (i = 0; i < (!LF_ISSET(TXNLIST_NEW) ? 1 : elp->u.l.ntxns); i++)
+ TXN_BUBBLE(elp->u.l.lsn_array, elp->u.l.ntxns);
+
+ *lsnp = elp->u.l.lsn_array[0];
+
+ return (0);
+}
+
+/*
+ * __db_txnlist_lsnhead --
+ * Return a pointer to the beginning of the lsn_array.
+ *
+ * PUBLIC: int __db_txnlist_lsnhead __P((void *, DB_LSN **));
+ */
+int
+__db_txnlist_lsnhead(listp, lsnpp)
+ void *listp;
+ DB_LSN **lsnpp;
+{
+ DB_TXNHEAD *hp;
+ DB_TXNLIST *elp;
+
+ hp = (DB_TXNHEAD *)listp;
+
+ for (elp = LIST_FIRST(&hp->head);
+ elp != NULL; elp = LIST_NEXT(elp, links))
+ if (elp->type == TXNLIST_LSN)
+ break;
+
+ if (elp == NULL)
+ return (EINVAL);
+
+ *lsnpp = &elp->u.l.lsn_array[0];
+
+ return (0);
+}
+
+/*
+ * __db_txnlist_lsninit --
+ * Initialize a transaction list with an lsn array entry.
+ *
+ * PUBLIC: int __db_txnlist_lsninit __P((DB_ENV *, DB_TXNHEAD *, DB_LSN *));
+ */
+int
+__db_txnlist_lsninit(dbenv, hp, lsnp)
+ DB_ENV *dbenv;
+ DB_TXNHEAD *hp;
+ DB_LSN *lsnp;
+{
+ DB_TXNLIST *elp;
+ int ret;
+
+ elp = NULL;
+
+ if ((ret = __os_malloc(dbenv, sizeof(DB_TXNLIST), NULL, &elp)) != 0)
+ goto err;
+ LIST_INSERT_HEAD(&hp->head, elp, links);
+
+ if ((ret = __os_malloc(dbenv,
+ 12 * sizeof(DB_LSN), NULL, &elp->u.l.lsn_array)) != 0)
+ goto err;
+ elp->type = TXNLIST_LSN;
+ elp->u.l.maxn = 12;
+ elp->u.l.ntxns = 1;
+ elp->u.l.lsn_array[0] = *lsnp;
+
+ return (0);
+
+err: __db_txnlist_end(dbenv, hp);
+ return (ret);
+}
+
+/*
+ * __db_add_limbo -- add pages to the limbo list.
+ * Get the file information and call pgnoadd
+ * for each page.
+ *
+ * PUBLIC: int __db_add_limbo __P((DB_ENV *,
+ * PUBLIC: void *, int32_t, db_pgno_t, int32_t));
+ */
+int
+__db_add_limbo(dbenv, info, fileid, pgno, count)
+ DB_ENV *dbenv;
+ void *info;
+ int32_t fileid;
+ db_pgno_t pgno;
+ int32_t count;
+{
+ DB_LOG *dblp;
+ FNAME *fnp;
+ int ret;
+
+ dblp = dbenv->lg_handle;
+ if ((ret = __log_lid_to_fname(dblp, fileid, &fnp)) != 0)
+ return (ret);
+
+ do {
+ if ((ret =
+ __db_txnlist_pgnoadd(dbenv, info, fileid, fnp->ufid,
+ R_ADDR(&dblp->reginfo, fnp->name_off), pgno)) != 0)
+ return (ret);
+ pgno++;
+ } while (--count != 0);
+
+ return (0);
+}
+
+/*
+ * __db_do_the_limbo -- move pages from limbo to free.
+ *
+ * If we are in recovery we add things to the free list without
+ * logging becasue we want to incrementaly apply logs that
+ * may be generated on another copy of this environment.
+ * Otherwise we just call __db_free to put the pages on
+ * the free list and log the activity.
+ *
+ * PUBLIC: int __db_do_the_limbo __P((DB_ENV *, DB_TXNHEAD *));
+ */
+int
+__db_do_the_limbo(dbenv, hp)
+ DB_ENV *dbenv;
+ DB_TXNHEAD *hp;
+{
+ DB *dbp;
+ DBC *dbc;
+ DBMETA *meta;
+ DB_TXN *txn;
+ DB_TXNLIST *elp;
+ PAGE *pagep;
+ db_pgno_t last_pgno, pgno;
+ int i, in_recover, put_page, ret, t_ret;
+
+ dbp = NULL;
+ dbc = NULL;
+ txn = NULL;
+ ret = 0;
+
+ /* Are we in recovery? */
+ in_recover = F_ISSET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
+
+ for (elp = LIST_FIRST(&hp->head);
+ elp != NULL; elp = LIST_NEXT(elp, links)) {
+ if (elp->type != TXNLIST_PGNO)
+ continue;
+
+ if (in_recover) {
+ if ((ret = db_create(&dbp, dbenv, 0)) != 0)
+ goto err;
+
+ /*
+ * It is ok if the file is nolonger there.
+ */
+ dbp->type = DB_UNKNOWN;
+ ret = __db_dbopen(dbp,
+ elp->u.p.fname, 0, __db_omode("rw----"), 0);
+ } else {
+ /*
+ * If we are in transaction undo, then we know
+ * the fileid is still correct.
+ */
+ if ((ret =
+ __db_fileid_to_db(dbenv, &dbp,
+ elp->u.p.fileid, 0)) != 0 && ret != DB_DELETED)
+ goto err;
+ /* File is being destroyed. */
+ if (F_ISSET(dbp, DB_AM_DISCARD))
+ ret = DB_DELETED;
+ }
+ /*
+ * Verify that we are opening the same file that we were
+ * referring to when we wrote this log record.
+ */
+ if (ret == 0 &&
+ memcmp(elp->u.p.uid, dbp->fileid, DB_FILE_ID_LEN) == 0) {
+ last_pgno = PGNO_INVALID;
+ if (in_recover) {
+ pgno = PGNO_BASE_MD;
+ if ((ret = memp_fget(dbp->mpf,
+ &pgno, 0, (PAGE **)&meta)) != 0)
+ goto err;
+ last_pgno = meta->free;
+ /*
+ * Check to see if the head of the free
+ * list is any of the pages we are about
+ * to link in. We could have crashed
+ * after linking them in and before writing
+ * a checkpoint.
+ * It may not be the last one since
+ * any page may get reallocated before here.
+ */
+ if (last_pgno != PGNO_INVALID)
+ for (i = 0; i < elp->u.p.nentries; i++)
+ if (last_pgno
+ == elp->u.p.pgno_array[i])
+ goto done_it;
+ }
+
+ for (i = 0; i < elp->u.p.nentries; i++) {
+ pgno = elp->u.p.pgno_array[i];
+ if ((ret = memp_fget(dbp->mpf,
+ &pgno, DB_MPOOL_CREATE, &pagep)) != 0)
+ goto err;
+
+ put_page = 1;
+ if (IS_ZERO_LSN(LSN(pagep))) {
+ P_INIT(pagep, dbp->pgsize,
+ pgno, PGNO_INVALID,
+ last_pgno, 0, P_INVALID);
+
+ if (in_recover) {
+ LSN(pagep) = LSN(meta);
+ last_pgno = pgno;
+ } else {
+ /*
+ * Starting the transaction
+ * is postponed until we know
+ * we have something to do.
+ */
+ if (txn == NULL &&
+ (ret = txn_begin(dbenv,
+ NULL, &txn, 0)) != 0)
+ goto err;
+
+ if (dbc == NULL &&
+ (ret = dbp->cursor(dbp,
+ txn, &dbc, 0)) != 0)
+ goto err;
+ /* Turn off locking. */
+ F_SET(dbc, DBC_COMPENSATE);
+
+ /* __db_free puts the page. */
+ if ((ret =
+ __db_free(dbc, pagep)) != 0)
+ goto err;
+ put_page = 0;
+ }
+ }
+
+ if (put_page == 1 &&
+ (ret = memp_fput(dbp->mpf,
+ pagep, DB_MPOOL_DIRTY)) != 0)
+ goto err;
+ }
+ if (in_recover) {
+ if (last_pgno == meta->free) {
+done_it:
+ if ((ret =
+ memp_fput(dbp->mpf, meta, 0)) != 0)
+ goto err;
+ } else {
+ /*
+ * Flush the new free list then
+ * update the metapage. This is
+ * unlogged so we cannot have the
+ * metapage pointing at pages that
+ * are not on disk.
+ */
+ dbp->sync(dbp, 0);
+ meta->free = last_pgno;
+ if ((ret = memp_fput(dbp->mpf,
+ meta, DB_MPOOL_DIRTY)) != 0)
+ goto err;
+ }
+ }
+ if (dbc != NULL && (ret = dbc->c_close(dbc)) != 0)
+ goto err;
+ dbc = NULL;
+ }
+ if (in_recover && (t_ret = dbp->close(dbp, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ dbp = NULL;
+ __os_free(elp->u.p.fname, 0);
+ __os_free(elp->u.p.pgno_array, 0);
+ if (ret == ENOENT)
+ ret = 0;
+ else if (ret != 0)
+ goto err;
+ }
+
+ if (txn != NULL) {
+ ret = txn_commit(txn, 0);
+ txn = NULL;
+ }
+err:
+ if (dbc != NULL)
+ (void)dbc->c_close(dbc);
+ if (in_recover && dbp != NULL)
+ (void)dbp->close(dbp, 0);
+ if (txn != NULL)
+ (void)txn_abort(txn);
+ return (ret);
+
+}
+
+#define DB_TXNLIST_MAX_PGNO 8 /* A nice even number. */
+
+/*
+ * __db_txnlist_pgnoadd --
+ * Find the txnlist entry for a file and add this pgno,
+ * or add the list entry for the file and then add the pgno.
+ *
+ * PUBLIC: int __db_txnlist_pgnoadd __P((DB_ENV *, DB_TXNHEAD *,
+ * PUBLIC: int32_t, u_int8_t [DB_FILE_ID_LEN], char *, db_pgno_t));
+ */
+int
+__db_txnlist_pgnoadd(dbenv, hp, fileid, uid, fname, pgno)
+ DB_ENV *dbenv;
+ DB_TXNHEAD *hp;
+ int32_t fileid;
+ u_int8_t uid[DB_FILE_ID_LEN];
+ char *fname;
+ db_pgno_t pgno;
+{
+ DB_TXNLIST *elp;
+ int len, ret;
+
+ elp = NULL;
+
+ if (__db_txnlist_find_internal(hp, TXNLIST_PGNO, 0, uid, &elp, 0) != 0) {
+ if ((ret =
+ __os_malloc(dbenv, sizeof(DB_TXNLIST), NULL, &elp)) != 0)
+ goto err;
+ LIST_INSERT_HEAD(&hp->head, elp, links);
+ elp->u.p.fileid = fileid;
+ memcpy(elp->u.p.uid, uid, DB_FILE_ID_LEN);
+
+ len = strlen(fname) + 1;
+ if ((ret = __os_malloc(dbenv, len, NULL, &elp->u.p.fname)) != 0)
+ goto err;
+ memcpy(elp->u.p.fname, fname, len);
+
+ elp->u.p.maxentry = 0;
+ elp->type = TXNLIST_PGNO;
+ if ((ret = __os_malloc(dbenv,
+ 8 * sizeof(db_pgno_t), NULL, &elp->u.p.pgno_array)) != 0)
+ goto err;
+ elp->u.p.maxentry = DB_TXNLIST_MAX_PGNO;
+ elp->u.p.nentries = 0;
+ } else if (elp->u.p.nentries == elp->u.p.maxentry) {
+ elp->u.p.maxentry <<= 1;
+ if ((ret = __os_realloc(dbenv, elp->u.p.maxentry *
+ sizeof(db_pgno_t), NULL, &elp->u.p.pgno_array)) != 0)
+ goto err;
+ }
+
+ elp->u.p.pgno_array[elp->u.p.nentries++] = pgno;
+
+ return (0);
+
+err: __db_txnlist_end(dbenv, hp);
+ return (ret);
+}
+
+#ifdef DEBUG
+/*
+ * __db_txnlist_print --
+ * Print out the transaction list.
+ *
+ * PUBLIC: void __db_txnlist_print __P((void *));
+ */
+void
+__db_txnlist_print(listp)
+ void *listp;
+{
+ DB_TXNHEAD *hp;
+ DB_TXNLIST *p;
+
+ hp = (DB_TXNHEAD *)listp;
+
+ printf("Maxid: %lu Generation: %lu\n",
+ (u_long)hp->maxid, (u_long)hp->generation);
+ for (p = LIST_FIRST(&hp->head); p != NULL; p = LIST_NEXT(p, links)) {
+ switch (p->type) {
+ case TXNLIST_TXNID:
+ printf("TXNID: %lu(%lu)\n",
+ (u_long)p->u.t.txnid, (u_long)p->u.t.generation);
+ break;
+ case TXNLIST_DELETE:
+ printf("FILE: %s id=%d ops=%d %s %s\n",
+ p->u.d.fname, p->u.d.fileid, p->u.d.count,
+ F_ISSET(&p->u.d, TXNLIST_FLAG_DELETED) ?
+ "(deleted)" : "(missing)",
+ F_ISSET(&p->u.d, TXNLIST_FLAG_CLOSED) ?
+ "(closed)" : "(open)");
+
+ break;
+ default:
+ printf("Unrecognized type: %d\n", p->type);
+ break;
+ }
+ }
+}
+#endif