summaryrefslogtreecommitdiff
path: root/bdb/log
diff options
context:
space:
mode:
Diffstat (limited to 'bdb/log')
-rw-r--r--bdb/log/log.c653
-rw-r--r--bdb/log/log.src46
-rw-r--r--bdb/log/log_archive.c447
-rw-r--r--bdb/log/log_auto.c326
-rw-r--r--bdb/log/log_compare.c34
-rw-r--r--bdb/log/log_findckp.c135
-rw-r--r--bdb/log/log_get.c465
-rw-r--r--bdb/log/log_method.c121
-rw-r--r--bdb/log/log_put.c701
-rw-r--r--bdb/log/log_rec.c621
-rw-r--r--bdb/log/log_register.c433
11 files changed, 3982 insertions, 0 deletions
diff --git a/bdb/log/log.c b/bdb/log/log.c
new file mode 100644
index 00000000000..69af1624824
--- /dev/null
+++ b/bdb/log/log.c
@@ -0,0 +1,653 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: log.c,v 11.42 2001/01/15 16:42:37 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_RPC
+#include "db_server.h"
+#endif
+
+#include "db_int.h"
+#include "log.h"
+#include "db_dispatch.h"
+#include "txn.h"
+#include "txn_auto.h"
+
+#ifdef HAVE_RPC
+#include "gen_client_ext.h"
+#include "rpc_client_ext.h"
+#endif
+
+static int __log_init __P((DB_ENV *, DB_LOG *));
+static int __log_recover __P((DB_LOG *));
+
+/*
+ * __log_open --
+ * Internal version of log_open: only called from DB_ENV->open.
+ *
+ * PUBLIC: int __log_open __P((DB_ENV *));
+ */
+int
+__log_open(dbenv)
+ DB_ENV *dbenv;
+{
+ DB_LOG *dblp;
+ LOG *lp;
+ int ret;
+ u_int8_t *readbufp;
+
+ readbufp = NULL;
+
+ /* Create/initialize the DB_LOG structure. */
+ if ((ret = __os_calloc(dbenv, 1, sizeof(DB_LOG), &dblp)) != 0)
+ return (ret);
+ if ((ret = __os_calloc(dbenv, 1, dbenv->lg_bsize, &readbufp)) != 0)
+ goto err;
+ ZERO_LSN(dblp->c_lsn);
+ dblp->dbenv = dbenv;
+
+ /* Join/create the log region. */
+ dblp->reginfo.type = REGION_TYPE_LOG;
+ dblp->reginfo.id = INVALID_REGION_ID;
+ dblp->reginfo.mode = dbenv->db_mode;
+ dblp->reginfo.flags = REGION_JOIN_OK;
+ if (F_ISSET(dbenv, DB_ENV_CREATE))
+ F_SET(&dblp->reginfo, REGION_CREATE_OK);
+ if ((ret = __db_r_attach(
+ dbenv, &dblp->reginfo, LG_BASE_REGION_SIZE + dbenv->lg_bsize)) != 0)
+ goto err;
+
+ dblp->readbufp = readbufp;
+
+ /* If we created the region, initialize it. */
+ if (F_ISSET(&dblp->reginfo, REGION_CREATE) &&
+ (ret = __log_init(dbenv, dblp)) != 0)
+ goto err;
+
+ /* Set the local addresses. */
+ lp = dblp->reginfo.primary =
+ R_ADDR(&dblp->reginfo, dblp->reginfo.rp->primary);
+ dblp->bufp = R_ADDR(&dblp->reginfo, lp->buffer_off);
+
+ /*
+ * If the region is threaded, then we have to lock both the handles
+ * and the region, and we need to allocate a mutex for that purpose.
+ */
+ if (F_ISSET(dbenv, DB_ENV_THREAD)) {
+ if ((ret = __db_mutex_alloc(
+ dbenv, &dblp->reginfo, &dblp->mutexp)) != 0)
+ goto err;
+ if ((ret = __db_mutex_init(
+ dbenv, dblp->mutexp, 0, MUTEX_THREAD)) != 0)
+ goto err;
+ }
+
+ R_UNLOCK(dbenv, &dblp->reginfo);
+
+ dblp->r_file = 0;
+ dblp->r_off = 0;
+ dblp->r_size = 0;
+ dbenv->lg_handle = dblp;
+ return (0);
+
+err: if (dblp->reginfo.addr != NULL) {
+ if (F_ISSET(&dblp->reginfo, REGION_CREATE))
+ ret = __db_panic(dbenv, ret);
+ R_UNLOCK(dbenv, &dblp->reginfo);
+ (void)__db_r_detach(dbenv, &dblp->reginfo, 0);
+ }
+
+ if (readbufp != NULL)
+ __os_free(readbufp, dbenv->lg_bsize);
+ if (dblp->mutexp != NULL)
+ __db_mutex_free(dbenv, &dblp->reginfo, dblp->mutexp);
+ __os_free(dblp, sizeof(*dblp));
+ return (ret);
+}
+
+/*
+ * __log_init --
+ * Initialize a log region in shared memory.
+ */
+static int
+__log_init(dbenv, dblp)
+ DB_ENV *dbenv;
+ DB_LOG *dblp;
+{
+ LOG *region;
+ int ret;
+ void *p;
+
+ if ((ret = __db_shalloc(dblp->reginfo.addr,
+ sizeof(*region), 0, &dblp->reginfo.primary)) != 0)
+ goto mem_err;
+ dblp->reginfo.rp->primary =
+ R_OFFSET(&dblp->reginfo, dblp->reginfo.primary);
+ region = dblp->reginfo.primary;
+ memset(region, 0, sizeof(*region));
+
+ region->persist.lg_max = dbenv->lg_max;
+ region->persist.magic = DB_LOGMAGIC;
+ region->persist.version = DB_LOGVERSION;
+ region->persist.mode = dbenv->db_mode;
+ SH_TAILQ_INIT(&region->fq);
+
+ /* Initialize LOG LSNs. */
+ region->lsn.file = 1;
+ region->lsn.offset = 0;
+
+ /* Initialize the buffer. */
+ if ((ret =
+ __db_shalloc(dblp->reginfo.addr, dbenv->lg_bsize, 0, &p)) != 0) {
+mem_err: __db_err(dbenv, "Unable to allocate memory for the log buffer");
+ return (ret);
+ }
+ region->buffer_size = dbenv->lg_bsize;
+ region->buffer_off = R_OFFSET(&dblp->reginfo, p);
+
+ /* Try and recover any previous log files before releasing the lock. */
+ return (__log_recover(dblp));
+}
+
+/*
+ * __log_recover --
+ * Recover a log.
+ */
+static int
+__log_recover(dblp)
+ DB_LOG *dblp;
+{
+ DBT dbt;
+ DB_LSN lsn;
+ LOG *lp;
+ int cnt, found_checkpoint, ret;
+ u_int32_t chk;
+ logfile_validity status;
+
+ lp = dblp->reginfo.primary;
+
+ /*
+ * Find a log file. If none exist, we simply return, leaving
+ * everything initialized to a new log.
+ */
+ if ((ret = __log_find(dblp, 0, &cnt, &status)) != 0)
+ return (ret);
+ if (cnt == 0)
+ return (0);
+
+ /*
+ * If the last file is an old version, readable or no, start a new
+ * file. Don't bother finding checkpoints; if we didn't take a
+ * checkpoint right before upgrading, the user screwed up anyway.
+ */
+ if (status == DB_LV_OLD_READABLE || status == DB_LV_OLD_UNREADABLE) {
+ lp->lsn.file = lp->s_lsn.file = cnt + 1;
+ lp->lsn.offset = lp->s_lsn.offset = 0;
+ goto skipsearch;
+ }
+ DB_ASSERT(status == DB_LV_NORMAL);
+
+ /*
+ * We have the last useful log file and we've loaded any persistent
+ * information. Set the end point of the log past the end of the last
+ * file. Read the last file, looking for the last checkpoint and
+ * the log's end.
+ */
+ lp->lsn.file = cnt + 1;
+ lp->lsn.offset = 0;
+ lsn.file = cnt;
+ lsn.offset = 0;
+
+ /* Set the cursor. Shouldn't fail; leave error messages on. */
+ memset(&dbt, 0, sizeof(dbt));
+ if ((ret = __log_get(dblp, &lsn, &dbt, DB_SET, 0)) != 0)
+ return (ret);
+
+ /*
+ * Read to the end of the file, saving checkpoints. This will fail
+ * at some point, so turn off error messages.
+ */
+ found_checkpoint = 0;
+ while (__log_get(dblp, &lsn, &dbt, DB_NEXT, 1) == 0) {
+ if (dbt.size < sizeof(u_int32_t))
+ continue;
+ memcpy(&chk, dbt.data, sizeof(u_int32_t));
+ if (chk == DB_txn_ckp) {
+ lp->chkpt_lsn = lsn;
+ found_checkpoint = 1;
+ }
+ }
+
+ /*
+ * We now know where the end of the log is. Set the first LSN that
+ * we want to return to an application and the LSN of the last known
+ * record on disk.
+ */
+ lp->lsn = lsn;
+ lp->s_lsn = lsn;
+ lp->lsn.offset += dblp->c_len;
+ lp->s_lsn.offset += dblp->c_len;
+
+ /* Set up the current buffer information, too. */
+ lp->len = dblp->c_len;
+ lp->b_off = 0;
+ lp->w_off = lp->lsn.offset;
+
+ /*
+ * It's possible that we didn't find a checkpoint because there wasn't
+ * one in the last log file. Start searching.
+ */
+ if (!found_checkpoint && cnt > 1) {
+ lsn.file = cnt;
+ lsn.offset = 0;
+
+ /* Set the cursor. Shouldn't fail, leave error messages on. */
+ if ((ret = __log_get(dblp, &lsn, &dbt, DB_SET, 0)) != 0)
+ return (ret);
+
+ /*
+ * Read to the end of the file, saving checkpoints. Again,
+ * this can fail if there are no checkpoints in any log file,
+ * so turn error messages off.
+ */
+ while (__log_get(dblp, &lsn, &dbt, DB_PREV, 1) == 0) {
+ if (dbt.size < sizeof(u_int32_t))
+ continue;
+ memcpy(&chk, dbt.data, sizeof(u_int32_t));
+ if (chk == DB_txn_ckp) {
+ lp->chkpt_lsn = lsn;
+ found_checkpoint = 1;
+ break;
+ }
+ }
+ }
+
+ /* If we never find a checkpoint, that's okay, just 0 it out. */
+ if (!found_checkpoint)
+skipsearch: ZERO_LSN(lp->chkpt_lsn);
+
+ /*
+ * Reset the cursor lsn to the beginning of the log, so that an
+ * initial call to DB_NEXT does the right thing.
+ */
+ ZERO_LSN(dblp->c_lsn);
+
+ if (FLD_ISSET(dblp->dbenv->verbose, DB_VERB_RECOVERY))
+ __db_err(dblp->dbenv,
+ "Finding last valid log LSN: file: %lu offset %lu",
+ (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
+
+ return (0);
+}
+
+/*
+ * __log_find --
+ * Try to find a log file. If find_first is set, valp will contain
+ * the number of the first readable log file, else it will contain the number
+ * of the last log file (which may be too old to read).
+ *
+ * PUBLIC: int __log_find __P((DB_LOG *, int, int *, logfile_validity *));
+ */
+int
+__log_find(dblp, find_first, valp, statusp)
+ DB_LOG *dblp;
+ int find_first, *valp;
+ logfile_validity *statusp;
+{
+ logfile_validity clv_status, status;
+ u_int32_t clv, logval;
+ int cnt, fcnt, ret;
+ const char *dir;
+ char **names, *p, *q, savech;
+
+ clv_status = status = DB_LV_NORMAL;
+
+ /* Return a value of 0 as the log file number on failure. */
+ *valp = 0;
+
+ /* Find the directory name. */
+ if ((ret = __log_name(dblp, 1, &p, NULL, 0)) != 0)
+ return (ret);
+ if ((q = __db_rpath(p)) == NULL) {
+ COMPQUIET(savech, 0);
+ dir = PATH_DOT;
+ } else {
+ savech = *q;
+ *q = '\0';
+ dir = p;
+ }
+
+ /* Get the list of file names. */
+ ret = __os_dirlist(dblp->dbenv, dir, &names, &fcnt);
+
+ /*
+ * !!!
+ * We overwrote a byte in the string with a nul. Restore the string
+ * so that the diagnostic checks in the memory allocation code work
+ * and any error messages display the right file name.
+ */
+ if (q != NULL)
+ *q = savech;
+
+ if (ret != 0) {
+ __db_err(dblp->dbenv, "%s: %s", dir, db_strerror(ret));
+ __os_freestr(p);
+ return (ret);
+ }
+
+ /* Search for a valid log file name. */
+ for (cnt = fcnt, clv = logval = 0; --cnt >= 0;) {
+ if (strncmp(names[cnt], LFPREFIX, sizeof(LFPREFIX) - 1) != 0)
+ continue;
+
+ /*
+ * Use atol, not atoi; if an "int" is 16-bits, the largest
+ * log file name won't fit.
+ */
+ clv = atol(names[cnt] + (sizeof(LFPREFIX) - 1));
+ if (find_first) {
+ if (logval != 0 && clv > logval)
+ continue;
+ } else
+ if (logval != 0 && clv < logval)
+ continue;
+
+ /*
+ * Take note of whether the log file logval is
+ * an old version or incompletely initialized.
+ */
+ if ((ret = __log_valid(dblp, clv, 1, &status)) != 0)
+ goto err;
+ switch (status) {
+ case DB_LV_INCOMPLETE:
+ /*
+ * It's acceptable for the last log file to
+ * have been incompletely initialized--it's possible
+ * to create a log file but not write anything to it,
+ * and recovery needs to gracefully handle this.
+ *
+ * Just ignore it; we don't want to return this
+ * as a valid log file.
+ */
+ break;
+ case DB_LV_NORMAL:
+ case DB_LV_OLD_READABLE:
+ logval = clv;
+ clv_status = status;
+ break;
+ case DB_LV_OLD_UNREADABLE:
+ /*
+ * Continue; we want the oldest valid log,
+ * and clv is too old to be useful. We don't
+ * want it to supplant logval if we're looking for
+ * the oldest valid log, but we do want to return
+ * it if it's the last log file--we want the very
+ * last file number, so that our caller can
+ * start a new file after it.
+ *
+ * The code here assumes that there will never
+ * be a too-old log that's preceded by a log
+ * of the current version, but in order to
+ * attain that state of affairs the user
+ * would have had to really seriously screw
+ * up; I think we can safely assume this won't
+ * happen.
+ */
+ if (!find_first) {
+ logval = clv;
+ clv_status = status;
+ }
+ break;
+ }
+ }
+
+ *valp = logval;
+
+err: __os_dirfree(names, fcnt);
+ __os_freestr(p);
+ *statusp = clv_status;
+
+ return (ret);
+}
+
+/*
+ * log_valid --
+ * Validate a log file. Returns an error code in the event of
+ * a fatal flaw in a the specified log file; returns success with
+ * a code indicating the currentness and completeness of the specified
+ * log file if it is not unexpectedly flawed (that is, if it's perfectly
+ * normal, if it's zero-length, or if it's an old version).
+ *
+ * PUBLIC: int __log_valid __P((DB_LOG *, u_int32_t, int, logfile_validity *));
+ */
+int
+__log_valid(dblp, number, set_persist, statusp)
+ DB_LOG *dblp;
+ u_int32_t number;
+ int set_persist;
+ logfile_validity *statusp;
+{
+ DB_FH fh;
+ LOG *region;
+ LOGP persist;
+ char *fname;
+ int ret;
+ logfile_validity status;
+ size_t nw;
+
+ status = DB_LV_NORMAL;
+
+ /* Try to open the log file. */
+ if ((ret = __log_name(dblp,
+ number, &fname, &fh, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) {
+ __os_freestr(fname);
+ return (ret);
+ }
+
+ /* Try to read the header. */
+ if ((ret =
+ __os_seek(dblp->dbenv,
+ &fh, 0, 0, sizeof(HDR), 0, DB_OS_SEEK_SET)) != 0 ||
+ (ret =
+ __os_read(dblp->dbenv, &fh, &persist, sizeof(LOGP), &nw)) != 0 ||
+ nw != sizeof(LOGP)) {
+ if (ret == 0)
+ status = DB_LV_INCOMPLETE;
+ else
+ /*
+ * The error was a fatal read error, not just an
+ * incompletely initialized log file.
+ */
+ __db_err(dblp->dbenv, "Ignoring log file: %s: %s",
+ fname, db_strerror(ret));
+
+ (void)__os_closehandle(&fh);
+ goto err;
+ }
+ (void)__os_closehandle(&fh);
+
+ /* Validate the header. */
+ if (persist.magic != DB_LOGMAGIC) {
+ __db_err(dblp->dbenv,
+ "Ignoring log file: %s: magic number %lx, not %lx",
+ fname, (u_long)persist.magic, (u_long)DB_LOGMAGIC);
+ ret = EINVAL;
+ goto err;
+ }
+
+ /*
+ * Set our status code to indicate whether the log file
+ * belongs to an unreadable or readable old version; leave it
+ * alone if and only if the log file version is the current one.
+ */
+ if (persist.version > DB_LOGVERSION) {
+ /* This is a fatal error--the log file is newer than DB. */
+ __db_err(dblp->dbenv,
+ "Ignoring log file: %s: unsupported log version %lu",
+ fname, (u_long)persist.version);
+ ret = EINVAL;
+ goto err;
+ } else if (persist.version < DB_LOGOLDVER) {
+ status = DB_LV_OLD_UNREADABLE;
+ /*
+ * We don't want to set persistent info based on an
+ * unreadable region, so jump to "err".
+ */
+ goto err;
+ } else if (persist.version < DB_LOGVERSION)
+ status = DB_LV_OLD_READABLE;
+
+ /*
+ * If the log is thus far readable and we're doing system
+ * initialization, set the region's persistent information
+ * based on the headers.
+ */
+ if (set_persist) {
+ region = dblp->reginfo.primary;
+ region->persist.lg_max = persist.lg_max;
+ region->persist.mode = persist.mode;
+ }
+
+err: __os_freestr(fname);
+ *statusp = status;
+ return (ret);
+}
+
+/*
+ * __log_close --
+ * Internal version of log_close: only called from dbenv_refresh.
+ *
+ * PUBLIC: int __log_close __P((DB_ENV *));
+ */
+int
+__log_close(dbenv)
+ DB_ENV *dbenv;
+{
+ DB_LOG *dblp;
+ int ret, t_ret;
+
+ ret = 0;
+ dblp = dbenv->lg_handle;
+
+ /* We may have opened files as part of XA; if so, close them. */
+ F_SET(dblp, DBLOG_RECOVER);
+ __log_close_files(dbenv);
+
+ /* Discard the per-thread lock. */
+ if (dblp->mutexp != NULL)
+ __db_mutex_free(dbenv, &dblp->reginfo, dblp->mutexp);
+
+ /* Detach from the region. */
+ ret = __db_r_detach(dbenv, &dblp->reginfo, 0);
+
+ /* Close open files, release allocated memory. */
+ if (F_ISSET(&dblp->lfh, DB_FH_VALID) &&
+ (t_ret = __os_closehandle(&dblp->lfh)) != 0 && ret == 0)
+ ret = t_ret;
+ if (dblp->c_dbt.data != NULL)
+ __os_free(dblp->c_dbt.data, dblp->c_dbt.ulen);
+ if (F_ISSET(&dblp->c_fh, DB_FH_VALID) &&
+ (t_ret = __os_closehandle(&dblp->c_fh)) != 0 && ret == 0)
+ ret = t_ret;
+ if (dblp->dbentry != NULL)
+ __os_free(dblp->dbentry,
+ (dblp->dbentry_cnt * sizeof(DB_ENTRY)));
+ if (dblp->readbufp != NULL)
+ __os_free(dblp->readbufp, dbenv->lg_bsize);
+
+ __os_free(dblp, sizeof(*dblp));
+
+ dbenv->lg_handle = NULL;
+ return (ret);
+}
+
+/*
+ * log_stat --
+ * Return LOG statistics.
+ */
+int
+log_stat(dbenv, statp, db_malloc)
+ DB_ENV *dbenv;
+ DB_LOG_STAT **statp;
+ void *(*db_malloc) __P((size_t));
+{
+ DB_LOG *dblp;
+ DB_LOG_STAT *stats;
+ LOG *region;
+ int ret;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_log_stat(dbenv, statp, db_malloc));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG);
+
+ *statp = NULL;
+
+ dblp = dbenv->lg_handle;
+ region = dblp->reginfo.primary;
+
+ if ((ret = __os_malloc(dbenv,
+ sizeof(DB_LOG_STAT), db_malloc, &stats)) != 0)
+ return (ret);
+
+ /* Copy out the global statistics. */
+ R_LOCK(dbenv, &dblp->reginfo);
+ *stats = region->stat;
+
+ stats->st_magic = region->persist.magic;
+ stats->st_version = region->persist.version;
+ stats->st_mode = region->persist.mode;
+ stats->st_lg_bsize = region->buffer_size;
+ stats->st_lg_max = region->persist.lg_max;
+
+ stats->st_region_wait = dblp->reginfo.rp->mutex.mutex_set_wait;
+ stats->st_region_nowait = dblp->reginfo.rp->mutex.mutex_set_nowait;
+ stats->st_regsize = dblp->reginfo.rp->size;
+
+ stats->st_cur_file = region->lsn.file;
+ stats->st_cur_offset = region->lsn.offset;
+
+ R_UNLOCK(dbenv, &dblp->reginfo);
+
+ *statp = stats;
+ return (0);
+}
+
+/*
+ * __log_lastckp --
+ * Return the current chkpt_lsn, so that we can store it in
+ * the transaction region and keep the chain of checkpoints
+ * unbroken across environment recreates.
+ *
+ * PUBLIC: int __log_lastckp __P((DB_ENV *, DB_LSN *));
+ */
+int
+__log_lastckp(dbenv, lsnp)
+ DB_ENV *dbenv;
+ DB_LSN *lsnp;
+{
+ LOG *lp;
+
+ lp = (LOG *)(((DB_LOG *)dbenv->lg_handle)->reginfo.primary);
+
+ *lsnp = lp->chkpt_lsn;
+ return (0);
+}
diff --git a/bdb/log/log.src b/bdb/log/log.src
new file mode 100644
index 00000000000..a92fae8de26
--- /dev/null
+++ b/bdb/log/log.src
@@ -0,0 +1,46 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ *
+ * $Id: log.src,v 10.12 2000/02/17 20:24:10 bostic Exp $
+ */
+
+PREFIX log
+
+INCLUDE #include "db_config.h"
+INCLUDE
+INCLUDE #ifndef NO_SYSTEM_INCLUDES
+INCLUDE #include <sys/types.h>
+INCLUDE
+INCLUDE #include <ctype.h>
+INCLUDE #include <errno.h>
+INCLUDE #include <string.h>
+INCLUDE #endif
+INCLUDE
+INCLUDE #include "db_int.h"
+INCLUDE #include "db_page.h"
+INCLUDE #include "db_dispatch.h"
+INCLUDE #include "db_am.h"
+INCLUDE #include "log.h"
+INCLUDE #include "txn.h"
+INCLUDE
+
+/* Used for registering name/id translations at open or close. */
+DEPRECATED register1 1
+ARG opcode u_int32_t lu
+DBT name DBT s
+DBT uid DBT s
+ARG fileid int32_t ld
+ARG ftype DBTYPE lx
+END
+
+BEGIN register 2
+ARG opcode u_int32_t lu
+DBT name DBT s
+DBT uid DBT s
+ARG fileid int32_t ld
+ARG ftype DBTYPE lx
+ARG meta_pgno db_pgno_t lu
+END
diff --git a/bdb/log/log_archive.c b/bdb/log/log_archive.c
new file mode 100644
index 00000000000..83728c79e55
--- /dev/null
+++ b/bdb/log/log_archive.c
@@ -0,0 +1,447 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: log_archive.c,v 11.13 2000/11/30 00:58:40 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_RPC
+#include "db_server.h"
+#endif
+
+#include "db_int.h"
+#include "db_dispatch.h"
+#include "log.h"
+#include "clib_ext.h" /* XXX: needed for getcwd. */
+
+#ifdef HAVE_RPC
+#include "gen_client_ext.h"
+#include "rpc_client_ext.h"
+#endif
+
+static int __absname __P((DB_ENV *, char *, char *, char **));
+static int __build_data __P((DB_ENV *, char *, char ***, void *(*)(size_t)));
+static int __cmpfunc __P((const void *, const void *));
+static int __usermem __P((DB_ENV *, char ***, void *(*)(size_t)));
+
+/*
+ * log_archive --
+ * Supporting function for db_archive(1).
+ */
+int
+log_archive(dbenv, listp, flags, db_malloc)
+ DB_ENV *dbenv;
+ char ***listp;
+ u_int32_t flags;
+ void *(*db_malloc) __P((size_t));
+{
+ DBT rec;
+ DB_LOG *dblp;
+ DB_LSN stable_lsn;
+ u_int32_t fnum;
+ int array_size, n, ret;
+ char **array, **arrayp, *name, *p, *pref, buf[MAXPATHLEN];
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_log_archive(dbenv, listp, flags, db_malloc));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG);
+
+ name = NULL;
+ dblp = dbenv->lg_handle;
+ COMPQUIET(fnum, 0);
+
+#define OKFLAGS (DB_ARCH_ABS | DB_ARCH_DATA | DB_ARCH_LOG)
+ if (flags != 0) {
+ if ((ret =
+ __db_fchk(dbenv, "log_archive", flags, OKFLAGS)) != 0)
+ return (ret);
+ if ((ret =
+ __db_fcchk(dbenv,
+ "log_archive", flags, DB_ARCH_DATA, DB_ARCH_LOG)) != 0)
+ return (ret);
+ }
+
+ /*
+ * Get the absolute pathname of the current directory. It would
+ * be nice to get the shortest pathname of the database directory,
+ * but that's just not possible.
+ *
+ * XXX
+ * Can't trust getcwd(3) to set a valid errno. If it doesn't, just
+ * guess that we ran out of memory.
+ */
+ if (LF_ISSET(DB_ARCH_ABS)) {
+ __os_set_errno(0);
+ if ((pref = getcwd(buf, sizeof(buf))) == NULL) {
+ if (__os_get_errno() == 0)
+ __os_set_errno(ENOMEM);
+ return (__os_get_errno());
+ }
+ } else
+ pref = NULL;
+
+ switch (LF_ISSET(~DB_ARCH_ABS)) {
+ case DB_ARCH_DATA:
+ return (__build_data(dbenv, pref, listp, db_malloc));
+ case DB_ARCH_LOG:
+ memset(&rec, 0, sizeof(rec));
+ if (F_ISSET(dbenv, DB_ENV_THREAD))
+ F_SET(&rec, DB_DBT_MALLOC);
+ if ((ret = log_get(dbenv, &stable_lsn, &rec, DB_LAST)) != 0)
+ return (ret);
+ if (F_ISSET(dbenv, DB_ENV_THREAD))
+ __os_free(rec.data, rec.size);
+ fnum = stable_lsn.file;
+ break;
+ case 0:
+ if ((ret = __log_findckp(dbenv, &stable_lsn)) != 0) {
+ /*
+ * A return of DB_NOTFOUND means that we didn't find
+ * any records in the log (so we are not going to be
+ * deleting any log files).
+ */
+ if (ret != DB_NOTFOUND)
+ return (ret);
+ *listp = NULL;
+ return (0);
+ }
+ /* Remove any log files before the last stable LSN. */
+ fnum = stable_lsn.file - 1;
+ break;
+ }
+
+#define LIST_INCREMENT 64
+ /* Get some initial space. */
+ array_size = 10;
+ if ((ret = __os_malloc(dbenv,
+ sizeof(char *) * array_size, NULL, &array)) != 0)
+ return (ret);
+ array[0] = NULL;
+
+ /* Build an array of the file names. */
+ for (n = 0; fnum > 0; --fnum) {
+ if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0)
+ goto err;
+ if (__os_exists(name, NULL) != 0) {
+ if (LF_ISSET(DB_ARCH_LOG) && fnum == stable_lsn.file)
+ continue;
+ __os_freestr(name);
+ name = NULL;
+ break;
+ }
+
+ if (n >= array_size - 1) {
+ array_size += LIST_INCREMENT;
+ if ((ret = __os_realloc(dbenv,
+ sizeof(char *) * array_size, NULL, &array)) != 0)
+ goto err;
+ }
+
+ if (LF_ISSET(DB_ARCH_ABS)) {
+ if ((ret = __absname(dbenv,
+ pref, name, &array[n])) != 0)
+ goto err;
+ __os_freestr(name);
+ } else if ((p = __db_rpath(name)) != NULL) {
+ if ((ret = __os_strdup(dbenv, p + 1, &array[n])) != 0)
+ goto err;
+ __os_freestr(name);
+ } else
+ array[n] = name;
+
+ name = NULL;
+ array[++n] = NULL;
+ }
+
+ /* If there's nothing to return, we're done. */
+ if (n == 0) {
+ *listp = NULL;
+ ret = 0;
+ goto err;
+ }
+
+ /* Sort the list. */
+ qsort(array, (size_t)n, sizeof(char *), __cmpfunc);
+
+ /* Rework the memory. */
+ if ((ret = __usermem(dbenv, &array, db_malloc)) != 0)
+ goto err;
+
+ *listp = array;
+ return (0);
+
+err: if (array != NULL) {
+ for (arrayp = array; *arrayp != NULL; ++arrayp)
+ __os_freestr(*arrayp);
+ __os_free(array, sizeof(char *) * array_size);
+ }
+ if (name != NULL)
+ __os_freestr(name);
+ return (ret);
+}
+
+/*
+ * __build_data --
+ * Build a list of datafiles for return.
+ */
+static int
+__build_data(dbenv, pref, listp, db_malloc)
+ DB_ENV *dbenv;
+ char *pref, ***listp;
+ void *(*db_malloc) __P((size_t));
+{
+ DBT rec;
+ DB_LSN lsn;
+ __log_register_args *argp;
+ u_int32_t rectype;
+ int array_size, last, n, nxt, ret;
+ char **array, **arrayp, *p, *real_name;
+
+ /* Get some initial space. */
+ array_size = 10;
+ if ((ret = __os_malloc(dbenv,
+ sizeof(char *) * array_size, NULL, &array)) != 0)
+ return (ret);
+ array[0] = NULL;
+
+ memset(&rec, 0, sizeof(rec));
+ if (F_ISSET(dbenv, DB_ENV_THREAD))
+ F_SET(&rec, DB_DBT_MALLOC);
+ for (n = 0, ret = log_get(dbenv, &lsn, &rec, DB_FIRST);
+ ret == 0; ret = log_get(dbenv, &lsn, &rec, DB_NEXT)) {
+ if (rec.size < sizeof(rectype)) {
+ ret = EINVAL;
+ __db_err(dbenv, "log_archive: bad log record");
+ goto lg_free;
+ }
+
+ memcpy(&rectype, rec.data, sizeof(rectype));
+ if (rectype != DB_log_register) {
+ if (F_ISSET(dbenv, DB_ENV_THREAD)) {
+ __os_free(rec.data, rec.size);
+ rec.data = NULL;
+ }
+ continue;
+ }
+ if ((ret = __log_register_read(dbenv, rec.data, &argp)) != 0) {
+ ret = EINVAL;
+ __db_err(dbenv,
+ "log_archive: unable to read log record");
+ goto lg_free;
+ }
+
+ if (n >= array_size - 1) {
+ array_size += LIST_INCREMENT;
+ if ((ret = __os_realloc(dbenv,
+ sizeof(char *) * array_size, NULL, &array)) != 0)
+ goto lg_free;
+ }
+
+ if ((ret = __os_strdup(dbenv,
+ argp->name.data, &array[n])) != 0) {
+lg_free: if (F_ISSET(&rec, DB_DBT_MALLOC) && rec.data != NULL)
+ __os_free(rec.data, rec.size);
+ goto err1;
+ }
+
+ array[++n] = NULL;
+ __os_free(argp, 0);
+
+ if (F_ISSET(dbenv, DB_ENV_THREAD)) {
+ __os_free(rec.data, rec.size);
+ rec.data = NULL;
+ }
+ }
+
+ /* If there's nothing to return, we're done. */
+ if (n == 0) {
+ ret = 0;
+ *listp = NULL;
+ goto err1;
+ }
+
+ /* Sort the list. */
+ qsort(array, (size_t)n, sizeof(char *), __cmpfunc);
+
+ /*
+ * Build the real pathnames, discarding nonexistent files and
+ * duplicates.
+ */
+ for (last = nxt = 0; nxt < n;) {
+ /*
+ * Discard duplicates. Last is the next slot we're going
+ * to return to the user, nxt is the next slot that we're
+ * going to consider.
+ */
+ if (last != nxt) {
+ array[last] = array[nxt];
+ array[nxt] = NULL;
+ }
+ for (++nxt; nxt < n &&
+ strcmp(array[last], array[nxt]) == 0; ++nxt) {
+ __os_freestr(array[nxt]);
+ array[nxt] = NULL;
+ }
+
+ /* Get the real name. */
+ if ((ret = __db_appname(dbenv,
+ DB_APP_DATA, NULL, array[last], 0, NULL, &real_name)) != 0)
+ goto err2;
+
+ /* If the file doesn't exist, ignore it. */
+ if (__os_exists(real_name, NULL) != 0) {
+ __os_freestr(real_name);
+ __os_freestr(array[last]);
+ array[last] = NULL;
+ continue;
+ }
+
+ /* Rework the name as requested by the user. */
+ __os_freestr(array[last]);
+ array[last] = NULL;
+ if (pref != NULL) {
+ ret = __absname(dbenv, pref, real_name, &array[last]);
+ __os_freestr(real_name);
+ if (ret != 0)
+ goto err2;
+ } else if ((p = __db_rpath(real_name)) != NULL) {
+ ret = __os_strdup(dbenv, p + 1, &array[last]);
+ __os_freestr(real_name);
+ if (ret != 0)
+ goto err2;
+ } else
+ array[last] = real_name;
+ ++last;
+ }
+
+ /* NULL-terminate the list. */
+ array[last] = NULL;
+
+ /* Rework the memory. */
+ if ((ret = __usermem(dbenv, &array, db_malloc)) != 0)
+ goto err1;
+
+ *listp = array;
+ return (0);
+
+err2: /*
+ * XXX
+ * We've possibly inserted NULLs into the array list, so clean up a
+ * bit so that the other error processing works.
+ */
+ if (array != NULL)
+ for (; nxt < n; ++nxt)
+ __os_freestr(array[nxt]);
+ /* FALLTHROUGH */
+
+err1: if (array != NULL) {
+ for (arrayp = array; *arrayp != NULL; ++arrayp)
+ __os_freestr(*arrayp);
+ __os_free(array, array_size * sizeof(char *));
+ }
+ return (ret);
+}
+
+/*
+ * __absname --
+ * Return an absolute path name for the file.
+ */
+static int
+__absname(dbenv, pref, name, newnamep)
+ DB_ENV *dbenv;
+ char *pref, *name, **newnamep;
+{
+ size_t l_pref, l_name;
+ int isabspath, ret;
+ char *newname;
+
+ l_name = strlen(name);
+ isabspath = __os_abspath(name);
+ l_pref = isabspath ? 0 : strlen(pref);
+
+ /* Malloc space for concatenating the two. */
+ if ((ret = __os_malloc(dbenv,
+ l_pref + l_name + 2, NULL, &newname)) != 0)
+ return (ret);
+ *newnamep = newname;
+
+ /* Build the name. If `name' is an absolute path, ignore any prefix. */
+ if (!isabspath) {
+ memcpy(newname, pref, l_pref);
+ if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL)
+ newname[l_pref++] = PATH_SEPARATOR[0];
+ }
+ memcpy(newname + l_pref, name, l_name + 1);
+
+ return (0);
+}
+
+/*
+ * __usermem --
+ * Create a single chunk of memory that holds the returned information.
+ * If the user has their own malloc routine, use it.
+ */
+static int
+__usermem(dbenv, listp, db_malloc)
+ DB_ENV *dbenv;
+ char ***listp;
+ void *(*db_malloc) __P((size_t));
+{
+ size_t len;
+ int ret;
+ char **array, **arrayp, **orig, *strp;
+
+ /* Find out how much space we need. */
+ for (len = 0, orig = *listp; *orig != NULL; ++orig)
+ len += sizeof(char *) + strlen(*orig) + 1;
+ len += sizeof(char *);
+
+ /* Allocate it and set up the pointers. */
+ if ((ret = __os_malloc(dbenv, len, db_malloc, &array)) != 0)
+ return (ret);
+
+ strp = (char *)(array + (orig - *listp) + 1);
+
+ /* Copy the original information into the new memory. */
+ for (orig = *listp, arrayp = array; *orig != NULL; ++orig, ++arrayp) {
+ len = strlen(*orig);
+ memcpy(strp, *orig, len + 1);
+ *arrayp = strp;
+ strp += len + 1;
+
+ __os_freestr(*orig);
+ }
+
+ /* NULL-terminate the list. */
+ *arrayp = NULL;
+
+ __os_free(*listp, 0);
+ *listp = array;
+
+ return (0);
+}
+
+static int
+__cmpfunc(p1, p2)
+ const void *p1, *p2;
+{
+ return (strcmp(*((char * const *)p1), *((char * const *)p2)));
+}
diff --git a/bdb/log/log_auto.c b/bdb/log/log_auto.c
new file mode 100644
index 00000000000..281296cc238
--- /dev/null
+++ b/bdb/log/log_auto.c
@@ -0,0 +1,326 @@
+/* Do not edit: automatically built by gen_rec.awk. */
+#include "db_config.h"
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "db_am.h"
+#include "log.h"
+#include "txn.h"
+
+int
+__log_register1_print(dbenv, dbtp, lsnp, notused2, notused3)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *notused3;
+{
+ __log_register1_args *argp;
+ u_int32_t i;
+ u_int ch;
+ int ret;
+
+ i = 0;
+ ch = 0;
+ notused2 = DB_TXN_ABORT;
+ notused3 = NULL;
+
+ if ((ret = __log_register1_read(dbenv, dbtp->data, &argp)) != 0)
+ return (ret);
+ printf("[%lu][%lu]log_register1: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file,
+ (u_long)lsnp->offset,
+ (u_long)argp->type,
+ (u_long)argp->txnid->txnid,
+ (u_long)argp->prev_lsn.file,
+ (u_long)argp->prev_lsn.offset);
+ printf("\topcode: %lu\n", (u_long)argp->opcode);
+ printf("\tname: ");
+ for (i = 0; i < argp->name.size; i++) {
+ ch = ((u_int8_t *)argp->name.data)[i];
+ if (isprint(ch) || ch == 0xa)
+ putchar(ch);
+ else
+ printf("%#x ", ch);
+ }
+ printf("\n");
+ printf("\tuid: ");
+ for (i = 0; i < argp->uid.size; i++) {
+ ch = ((u_int8_t *)argp->uid.data)[i];
+ if (isprint(ch) || ch == 0xa)
+ putchar(ch);
+ else
+ printf("%#x ", ch);
+ }
+ printf("\n");
+ printf("\tfileid: %ld\n", (long)argp->fileid);
+ printf("\tftype: 0x%lx\n", (u_long)argp->ftype);
+ printf("\n");
+ __os_free(argp, 0);
+ return (0);
+}
+
+int
+__log_register1_read(dbenv, recbuf, argpp)
+ DB_ENV *dbenv;
+ void *recbuf;
+ __log_register1_args **argpp;
+{
+ __log_register1_args *argp;
+ u_int8_t *bp;
+ int ret;
+
+ ret = __os_malloc(dbenv, sizeof(__log_register1_args) +
+ sizeof(DB_TXN), NULL, &argp);
+ if (ret != 0)
+ return (ret);
+ argp->txnid = (DB_TXN *)&argp[1];
+ bp = recbuf;
+ memcpy(&argp->type, bp, sizeof(argp->type));
+ bp += sizeof(argp->type);
+ memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
+ bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+ bp += sizeof(argp->opcode);
+ memset(&argp->name, 0, sizeof(argp->name));
+ memcpy(&argp->name.size, bp, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ argp->name.data = bp;
+ bp += argp->name.size;
+ memset(&argp->uid, 0, sizeof(argp->uid));
+ memcpy(&argp->uid.size, bp, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ argp->uid.data = bp;
+ bp += argp->uid.size;
+ memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+ bp += sizeof(argp->fileid);
+ memcpy(&argp->ftype, bp, sizeof(argp->ftype));
+ bp += sizeof(argp->ftype);
+ *argpp = argp;
+ return (0);
+}
+
+int
+__log_register_log(dbenv, txnid, ret_lsnp, flags,
+ opcode, name, uid, fileid, ftype, meta_pgno)
+ DB_ENV *dbenv;
+ DB_TXN *txnid;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t opcode;
+ const DBT *name;
+ const DBT *uid;
+ int32_t fileid;
+ DBTYPE ftype;
+ db_pgno_t meta_pgno;
+{
+ DBT logrec;
+ DB_LSN *lsnp, null_lsn;
+ u_int32_t zero;
+ u_int32_t rectype, txn_num;
+ int ret;
+ u_int8_t *bp;
+
+ rectype = DB_log_register;
+ if (txnid != NULL &&
+ TAILQ_FIRST(&txnid->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ return (ret);
+ txn_num = txnid == NULL ? 0 : txnid->txnid;
+ if (txnid == NULL) {
+ ZERO_LSN(null_lsn);
+ lsnp = &null_lsn;
+ } else
+ lsnp = &txnid->last_lsn;
+ logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+ + sizeof(opcode)
+ + sizeof(u_int32_t) + (name == NULL ? 0 : name->size)
+ + sizeof(u_int32_t) + (uid == NULL ? 0 : uid->size)
+ + sizeof(fileid)
+ + sizeof(ftype)
+ + sizeof(meta_pgno);
+ if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
+ return (ret);
+
+ bp = logrec.data;
+ memcpy(bp, &rectype, sizeof(rectype));
+ bp += sizeof(rectype);
+ memcpy(bp, &txn_num, sizeof(txn_num));
+ bp += sizeof(txn_num);
+ memcpy(bp, lsnp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(bp, &opcode, sizeof(opcode));
+ bp += sizeof(opcode);
+ if (name == NULL) {
+ zero = 0;
+ memcpy(bp, &zero, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ } else {
+ memcpy(bp, &name->size, sizeof(name->size));
+ bp += sizeof(name->size);
+ memcpy(bp, name->data, name->size);
+ bp += name->size;
+ }
+ if (uid == NULL) {
+ zero = 0;
+ memcpy(bp, &zero, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ } else {
+ memcpy(bp, &uid->size, sizeof(uid->size));
+ bp += sizeof(uid->size);
+ memcpy(bp, uid->data, uid->size);
+ bp += uid->size;
+ }
+ memcpy(bp, &fileid, sizeof(fileid));
+ bp += sizeof(fileid);
+ memcpy(bp, &ftype, sizeof(ftype));
+ bp += sizeof(ftype);
+ memcpy(bp, &meta_pgno, sizeof(meta_pgno));
+ bp += sizeof(meta_pgno);
+ DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
+ ret = __log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
+ if (txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ __os_free(logrec.data, logrec.size);
+ return (ret);
+}
+
+int
+__log_register_print(dbenv, dbtp, lsnp, notused2, notused3)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *notused3;
+{
+ __log_register_args *argp;
+ u_int32_t i;
+ u_int ch;
+ int ret;
+
+ i = 0;
+ ch = 0;
+ notused2 = DB_TXN_ABORT;
+ notused3 = NULL;
+
+ if ((ret = __log_register_read(dbenv, dbtp->data, &argp)) != 0)
+ return (ret);
+ printf("[%lu][%lu]log_register: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file,
+ (u_long)lsnp->offset,
+ (u_long)argp->type,
+ (u_long)argp->txnid->txnid,
+ (u_long)argp->prev_lsn.file,
+ (u_long)argp->prev_lsn.offset);
+ printf("\topcode: %lu\n", (u_long)argp->opcode);
+ printf("\tname: ");
+ for (i = 0; i < argp->name.size; i++) {
+ ch = ((u_int8_t *)argp->name.data)[i];
+ if (isprint(ch) || ch == 0xa)
+ putchar(ch);
+ else
+ printf("%#x ", ch);
+ }
+ printf("\n");
+ printf("\tuid: ");
+ for (i = 0; i < argp->uid.size; i++) {
+ ch = ((u_int8_t *)argp->uid.data)[i];
+ if (isprint(ch) || ch == 0xa)
+ putchar(ch);
+ else
+ printf("%#x ", ch);
+ }
+ printf("\n");
+ printf("\tfileid: %ld\n", (long)argp->fileid);
+ printf("\tftype: 0x%lx\n", (u_long)argp->ftype);
+ printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno);
+ printf("\n");
+ __os_free(argp, 0);
+ return (0);
+}
+
+int
+__log_register_read(dbenv, recbuf, argpp)
+ DB_ENV *dbenv;
+ void *recbuf;
+ __log_register_args **argpp;
+{
+ __log_register_args *argp;
+ u_int8_t *bp;
+ int ret;
+
+ ret = __os_malloc(dbenv, sizeof(__log_register_args) +
+ sizeof(DB_TXN), NULL, &argp);
+ if (ret != 0)
+ return (ret);
+ argp->txnid = (DB_TXN *)&argp[1];
+ bp = recbuf;
+ memcpy(&argp->type, bp, sizeof(argp->type));
+ bp += sizeof(argp->type);
+ memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
+ bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+ bp += sizeof(argp->opcode);
+ memset(&argp->name, 0, sizeof(argp->name));
+ memcpy(&argp->name.size, bp, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ argp->name.data = bp;
+ bp += argp->name.size;
+ memset(&argp->uid, 0, sizeof(argp->uid));
+ memcpy(&argp->uid.size, bp, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ argp->uid.data = bp;
+ bp += argp->uid.size;
+ memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+ bp += sizeof(argp->fileid);
+ memcpy(&argp->ftype, bp, sizeof(argp->ftype));
+ bp += sizeof(argp->ftype);
+ memcpy(&argp->meta_pgno, bp, sizeof(argp->meta_pgno));
+ bp += sizeof(argp->meta_pgno);
+ *argpp = argp;
+ return (0);
+}
+
+int
+__log_init_print(dbenv)
+ DB_ENV *dbenv;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery(dbenv,
+ __log_register1_print, DB_log_register1)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __log_register_print, DB_log_register)) != 0)
+ return (ret);
+ return (0);
+}
+
+int
+__log_init_recover(dbenv)
+ DB_ENV *dbenv;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery(dbenv,
+ __deprecated_recover, DB_log_register1)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __log_register_recover, DB_log_register)) != 0)
+ return (ret);
+ return (0);
+}
+
diff --git a/bdb/log/log_compare.c b/bdb/log/log_compare.c
new file mode 100644
index 00000000000..9bc3c028a5f
--- /dev/null
+++ b/bdb/log/log_compare.c
@@ -0,0 +1,34 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: log_compare.c,v 11.3 2000/02/14 02:59:59 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * log_compare --
+ * Compare two LSN's; return 1, 0, -1 if first is >, == or < second.
+ */
+int
+log_compare(lsn0, lsn1)
+ const DB_LSN *lsn0, *lsn1;
+{
+ if (lsn0->file != lsn1->file)
+ return (lsn0->file < lsn1->file ? -1 : 1);
+
+ if (lsn0->offset != lsn1->offset)
+ return (lsn0->offset < lsn1->offset ? -1 : 1);
+
+ return (0);
+}
diff --git a/bdb/log/log_findckp.c b/bdb/log/log_findckp.c
new file mode 100644
index 00000000000..b1e8fddbdb7
--- /dev/null
+++ b/bdb/log/log_findckp.c
@@ -0,0 +1,135 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: log_findckp.c,v 11.5 2000/11/30 00:58:40 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "log.h"
+#include "txn.h"
+
+/*
+ * __log_findckp --
+ *
+ * Looks for the most recent checkpoint that occurs before the most recent
+ * checkpoint LSN, subject to the constraint that there must be at least two
+ * checkpoints. The reason you need two checkpoints is that you might have
+ * crashed during the most recent one and may not have a copy of all the
+ * open files. This is the point from which recovery can start and the
+ * point up to which archival/truncation can take place. Checkpoints in
+ * the log look like:
+ *
+ * -------------------------------------------------------------------
+ * | ckp A, ckplsn 100 | .... record .... | ckp B, ckplsn 600 | ...
+ * -------------------------------------------------------------------
+ * LSN 500 LSN 1000
+ *
+ * If we read what log returns from using the DB_CKP parameter to logput,
+ * we'll get the record at LSN 1000. The checkpoint LSN there is 600.
+ * Now we have to scan backwards looking for a checkpoint before LSN 600.
+ * We find one at 500. This means that we can truncate the log before
+ * 500 or run recovery beginning at 500.
+ *
+ * Returns 0 if we find a suitable checkpoint or we retrieved the first
+ * record in the log from which to start. Returns DB_NOTFOUND if there
+ * are no log records, errno on error.
+ *
+ * PUBLIC: int __log_findckp __P((DB_ENV *, DB_LSN *));
+ */
+int
+__log_findckp(dbenv, lsnp)
+ DB_ENV *dbenv;
+ DB_LSN *lsnp;
+{
+ DBT data;
+ DB_LSN ckp_lsn, final_ckp, last_ckp, next_lsn;
+ __txn_ckp_args *ckp_args;
+ int ret;
+
+ /*
+ * Need to find the appropriate point from which to begin
+ * recovery.
+ */
+ memset(&data, 0, sizeof(data));
+ if (F_ISSET(dbenv, DB_ENV_THREAD))
+ F_SET(&data, DB_DBT_MALLOC);
+ ZERO_LSN(ckp_lsn);
+ if ((ret = log_get(dbenv, &last_ckp, &data, DB_CHECKPOINT)) != 0) {
+ if (ret == ENOENT)
+ goto get_first;
+ else
+ return (ret);
+ }
+ final_ckp = last_ckp;
+
+ next_lsn = last_ckp;
+ do {
+ if (F_ISSET(dbenv, DB_ENV_THREAD))
+ __os_free(data.data, data.size);
+
+ if ((ret = log_get(dbenv, &next_lsn, &data, DB_SET)) != 0)
+ return (ret);
+ if ((ret = __txn_ckp_read(dbenv, data.data, &ckp_args)) != 0) {
+ if (F_ISSET(dbenv, DB_ENV_THREAD))
+ __os_free(data.data, data.size);
+ return (ret);
+ }
+ if (IS_ZERO_LSN(ckp_lsn))
+ ckp_lsn = ckp_args->ckp_lsn;
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_CHKPOINT)) {
+ __db_err(dbenv, "Checkpoint at: [%lu][%lu]",
+ (u_long)last_ckp.file, (u_long)last_ckp.offset);
+ __db_err(dbenv, "Checkpoint LSN: [%lu][%lu]",
+ (u_long)ckp_args->ckp_lsn.file,
+ (u_long)ckp_args->ckp_lsn.offset);
+ __db_err(dbenv, "Previous checkpoint: [%lu][%lu]",
+ (u_long)ckp_args->last_ckp.file,
+ (u_long)ckp_args->last_ckp.offset);
+ }
+ last_ckp = next_lsn;
+ next_lsn = ckp_args->last_ckp;
+ __os_free(ckp_args, sizeof(*ckp_args));
+
+ /*
+ * Keep looping until either you 1) run out of checkpoints,
+ * 2) you've found a checkpoint before the most recent
+ * checkpoint's LSN and you have at least 2 checkpoints.
+ */
+ } while (!IS_ZERO_LSN(next_lsn) &&
+ (log_compare(&last_ckp, &ckp_lsn) > 0 ||
+ log_compare(&final_ckp, &last_ckp) == 0));
+
+ if (F_ISSET(dbenv, DB_ENV_THREAD))
+ __os_free(data.data, data.size);
+
+ /*
+ * At this point, either, next_lsn is ZERO or ckp_lsn is the
+ * checkpoint lsn and last_ckp is the LSN of the last checkpoint
+ * before ckp_lsn. If the compare in the loop is still true, then
+ * next_lsn must be 0 and we need to roll forward from the
+ * beginning of the log.
+ */
+ if (log_compare(&last_ckp, &ckp_lsn) >= 0 ||
+ log_compare(&final_ckp, &last_ckp) == 0) {
+get_first: if ((ret = log_get(dbenv, &last_ckp, &data, DB_FIRST)) != 0)
+ return (ret);
+ if (F_ISSET(dbenv, DB_ENV_THREAD))
+ __os_free(data.data, data.size);
+ }
+ *lsnp = last_ckp;
+
+ return (IS_ZERO_LSN(last_ckp) ? DB_NOTFOUND : 0);
+}
diff --git a/bdb/log/log_get.c b/bdb/log/log_get.c
new file mode 100644
index 00000000000..b75d50a62fd
--- /dev/null
+++ b/bdb/log/log_get.c
@@ -0,0 +1,465 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: log_get.c,v 11.32 2001/01/11 18:19:53 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_RPC
+#include "db_server.h"
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "log.h"
+#include "hash.h"
+
+#ifdef HAVE_RPC
+#include "gen_client_ext.h"
+#include "rpc_client_ext.h"
+#endif
+
+/*
+ * log_get --
+ * Get a log record.
+ */
+int
+log_get(dbenv, alsn, dbt, flags)
+ DB_ENV *dbenv;
+ DB_LSN *alsn;
+ DBT *dbt;
+ u_int32_t flags;
+{
+ DB_LOG *dblp;
+ DB_LSN saved_lsn;
+ int ret;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_log_get(dbenv, alsn, dbt, flags));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG);
+
+ /* Validate arguments. */
+ if (flags != DB_CHECKPOINT && flags != DB_CURRENT &&
+ flags != DB_FIRST && flags != DB_LAST &&
+ flags != DB_NEXT && flags != DB_PREV && flags != DB_SET)
+ return (__db_ferr(dbenv, "log_get", 1));
+
+ if (F_ISSET(dbenv, DB_ENV_THREAD)) {
+ if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT)
+ return (__db_ferr(dbenv, "log_get", 1));
+ if (!F_ISSET(dbt,
+ DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERMEM))
+ return (__db_ferr(dbenv, "threaded data", 1));
+ }
+
+ dblp = dbenv->lg_handle;
+ R_LOCK(dbenv, &dblp->reginfo);
+
+ /*
+ * The alsn field is only initialized if DB_SET is the flag, so this
+ * assignment causes uninitialized memory complaints for other flag
+ * values.
+ */
+#ifdef UMRW
+ if (flags == DB_SET)
+ saved_lsn = *alsn;
+ else
+ ZERO_LSN(saved_lsn);
+#else
+ saved_lsn = *alsn;
+#endif
+
+ /*
+ * If we get one of the log's header records, repeat the operation.
+ * This assumes that applications don't ever request the log header
+ * records by LSN, but that seems reasonable to me.
+ */
+ if ((ret = __log_get(dblp,
+ alsn, dbt, flags, 0)) == 0 && alsn->offset == 0) {
+ switch (flags) {
+ case DB_FIRST:
+ flags = DB_NEXT;
+ break;
+ case DB_LAST:
+ flags = DB_PREV;
+ break;
+ }
+ if (F_ISSET(dbt, DB_DBT_MALLOC)) {
+ __os_free(dbt->data, dbt->size);
+ dbt->data = NULL;
+ }
+ ret = __log_get(dblp, alsn, dbt, flags, 0);
+ }
+ if (ret != 0)
+ *alsn = saved_lsn;
+
+ R_UNLOCK(dbenv, &dblp->reginfo);
+
+ return (ret);
+}
+
+/*
+ * __log_get --
+ * Get a log record; internal version.
+ *
+ * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int));
+ */
+int
+__log_get(dblp, alsn, dbt, flags, silent)
+ DB_LOG *dblp;
+ DB_LSN *alsn;
+ DBT *dbt;
+ u_int32_t flags;
+ int silent;
+{
+ DB_ENV *dbenv;
+ DB_LSN nlsn;
+ HDR hdr;
+ LOG *lp;
+ const char *fail;
+ char *np, *tbuf;
+ int cnt, ret;
+ logfile_validity status;
+ size_t len, nr;
+ u_int32_t offset;
+ u_int8_t *p;
+ void *shortp, *readp;
+
+ lp = dblp->reginfo.primary;
+ fail = np = tbuf = NULL;
+ dbenv = dblp->dbenv;
+
+ nlsn = dblp->c_lsn;
+ switch (flags) {
+ case DB_CHECKPOINT:
+ nlsn = lp->chkpt_lsn;
+ if (IS_ZERO_LSN(nlsn)) {
+ /* No db_err. The caller may expect this. */
+ ret = ENOENT;
+ goto err2;
+ }
+ break;
+ case DB_NEXT: /* Next log record. */
+ if (!IS_ZERO_LSN(nlsn)) {
+ /* Increment the cursor by the cursor record size. */
+ nlsn.offset += dblp->c_len;
+ break;
+ }
+ /* FALLTHROUGH */
+ case DB_FIRST: /* Find the first log record. */
+ /* Find the first log file. */
+ if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0)
+ goto err2;
+
+ /*
+ * We want any readable version, so either DB_LV_NORMAL
+ * or DB_LV_OLD_READABLE is acceptable here. If it's
+ * not one of those two, there is no first log record that
+ * we can read.
+ */
+ if (status != DB_LV_NORMAL && status != DB_LV_OLD_READABLE) {
+ ret = DB_NOTFOUND;
+ goto err2;
+ }
+
+ /*
+ * We may have only entered records in the buffer, and not
+ * yet written a log file. If no log files were found and
+ * there's anything in the buffer, it belongs to file 1.
+ */
+ if (cnt == 0)
+ cnt = 1;
+
+ nlsn.file = cnt;
+ nlsn.offset = 0;
+ break;
+ case DB_CURRENT: /* Current log record. */
+ break;
+ case DB_PREV: /* Previous log record. */
+ if (!IS_ZERO_LSN(nlsn)) {
+ /* If at start-of-file, move to the previous file. */
+ if (nlsn.offset == 0) {
+ if (nlsn.file == 1 ||
+ __log_valid(dblp,
+ nlsn.file - 1, 0, &status) != 0)
+ return (DB_NOTFOUND);
+
+ if (status != DB_LV_NORMAL &&
+ status != DB_LV_OLD_READABLE)
+ return (DB_NOTFOUND);
+
+ --nlsn.file;
+ nlsn.offset = dblp->c_off;
+ } else
+ nlsn.offset = dblp->c_off;
+ break;
+ }
+ /* FALLTHROUGH */
+ case DB_LAST: /* Last log record. */
+ nlsn.file = lp->lsn.file;
+ nlsn.offset = lp->lsn.offset - lp->len;
+ break;
+ case DB_SET: /* Set log record. */
+ nlsn = *alsn;
+ break;
+ }
+
+ if (0) { /* Move to the next file. */
+next_file: ++nlsn.file;
+ nlsn.offset = 0;
+ }
+
+ /* Return 1 if the request is past the end of the log. */
+ if (nlsn.file > lp->lsn.file ||
+ (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset))
+ return (DB_NOTFOUND);
+
+ /* If we've switched files, discard the current file handle. */
+ if (dblp->c_lsn.file != nlsn.file &&
+ F_ISSET(&dblp->c_fh, DB_FH_VALID)) {
+ (void)__os_closehandle(&dblp->c_fh);
+ }
+
+ /* If the entire record is in the in-memory buffer, copy it out. */
+ if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) {
+ /* Copy the header. */
+ p = dblp->bufp + (nlsn.offset - lp->w_off);
+ memcpy(&hdr, p, sizeof(HDR));
+
+ /* Copy the record. */
+ len = hdr.len - sizeof(HDR);
+ if ((ret = __db_retcopy(NULL, dbt, p + sizeof(HDR),
+ len, &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0)
+ goto err2;
+ goto cksum;
+ }
+
+ shortp = NULL;
+
+ /* Acquire a file descriptor. */
+ if (!F_ISSET(&dblp->c_fh, DB_FH_VALID)) {
+ if ((ret = __log_name(dblp, nlsn.file,
+ &np, &dblp->c_fh, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) {
+ fail = np;
+ goto err1;
+ }
+ __os_freestr(np);
+ np = NULL;
+ }
+
+ /* See if we've already read this */
+ if (nlsn.file == dblp->r_file && nlsn.offset > dblp->r_off
+ && nlsn.offset + sizeof(HDR) < dblp->r_off + dblp->r_size)
+ goto got_header;
+
+ /*
+ * Seek to the header offset and read the header. Because the file
+ * may be pre-allocated, we have to make sure that we're not reading
+ * past the information in the start of the in-memory buffer.
+ */
+
+ readp = &hdr;
+ offset = nlsn.offset;
+ if (nlsn.file == lp->lsn.file && offset + sizeof(HDR) > lp->w_off)
+ nr = lp->w_off - offset;
+ else if (dblp->readbufp == NULL)
+ nr = sizeof(HDR);
+ else {
+ nr = lp->buffer_size;
+ readp = dblp->readbufp;
+ dblp->r_file = nlsn.file;
+ /* Going backwards. Put the current in the middle. */
+ if (flags == DB_PREV || flags == DB_LAST) {
+ if (offset <= lp->buffer_size/2)
+ offset = 0;
+ else
+ offset = offset - lp->buffer_size/2;
+ }
+ if (nlsn.file == lp->lsn.file && offset + nr > lp->lsn.offset)
+ nr = lp->lsn.offset - offset;
+ dblp->r_off = offset;
+ }
+
+ if ((ret = __os_seek(dblp->dbenv,
+ &dblp->c_fh, 0, 0, offset, 0, DB_OS_SEEK_SET)) != 0) {
+ fail = "seek";
+ goto err1;
+ }
+ if ((ret = __os_read(dblp->dbenv, &dblp->c_fh, readp, nr, &nr)) != 0) {
+ fail = "read";
+ goto err1;
+ }
+ if (nr < sizeof(HDR)) {
+ /* If read returns EOF, try the next file. */
+ if (nr == 0) {
+ if (flags != DB_NEXT || nlsn.file == lp->lsn.file)
+ goto corrupt;
+ goto next_file;
+ }
+
+ if (dblp->readbufp != NULL)
+ memcpy((u_int8_t *) &hdr, readp, nr);
+
+ /*
+ * If read returns a short count the rest of the record has
+ * to be in the in-memory buffer.
+ */
+ if (lp->b_off < sizeof(HDR) - nr)
+ goto corrupt;
+
+ /* Get the rest of the header from the in-memory buffer. */
+ memcpy((u_int8_t *)&hdr + nr, dblp->bufp, sizeof(HDR) - nr);
+
+ if (hdr.len == 0)
+ goto next_file;
+
+ shortp = dblp->bufp + (sizeof(HDR) - nr);
+ }
+
+ else if (dblp->readbufp != NULL) {
+ dblp->r_size = nr;
+got_header: memcpy((u_int8_t *)&hdr,
+ dblp->readbufp + (nlsn.offset - dblp->r_off), sizeof(HDR));
+ }
+
+ /*
+ * Check for buffers of 0's, that's what we usually see during recovery,
+ * although it's certainly not something on which we can depend. Check
+ * for impossibly large records. The malloc should fail later, but we
+ * have customers that run mallocs that handle allocation failure as a
+ * fatal error.
+ */
+ if (hdr.len == 0)
+ goto next_file;
+ if (hdr.len <= sizeof(HDR) || hdr.len > lp->persist.lg_max)
+ goto corrupt;
+ len = hdr.len - sizeof(HDR);
+
+ /* If we've already moved to the in-memory buffer, fill from there. */
+ if (shortp != NULL) {
+ if (lp->b_off < ((u_int8_t *)shortp - dblp->bufp) + len)
+ goto corrupt;
+ if ((ret = __db_retcopy(NULL, dbt, shortp, len,
+ &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0)
+ goto err2;
+ goto cksum;
+ }
+
+ if (dblp->readbufp != NULL) {
+ if (nlsn.offset + hdr.len < dblp->r_off + dblp->r_size) {
+ if ((ret = __db_retcopy(NULL, dbt, dblp->readbufp +
+ (nlsn.offset - dblp->r_off) + sizeof(HDR),
+ len, &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0)
+ goto err2;
+ goto cksum;
+ } else if ((ret = __os_seek(dblp->dbenv, &dblp->c_fh, 0,
+ 0, nlsn.offset + sizeof(HDR), 0, DB_OS_SEEK_SET)) != 0) {
+ fail = "seek";
+ goto err1;
+ }
+ }
+
+ /*
+ * Allocate temporary memory to hold the record.
+ *
+ * XXX
+ * We're calling malloc(3) with a region locked. This isn't
+ * a good idea.
+ */
+ if ((ret = __os_malloc(dbenv, len, NULL, &tbuf)) != 0)
+ goto err1;
+
+ /*
+ * Read the record into the buffer. If read returns a short count,
+ * there was an error or the rest of the record is in the in-memory
+ * buffer. Note, the information may be garbage if we're in recovery,
+ * so don't read past the end of the buffer's memory.
+ *
+ * Because the file may be pre-allocated, we have to make sure that
+ * we're not reading past the information in the start of the in-memory
+ * buffer.
+ */
+ if (nlsn.file == lp->lsn.file &&
+ nlsn.offset + sizeof(HDR) + len > lp->w_off)
+ nr = lp->w_off - (nlsn.offset + sizeof(HDR));
+ else
+ nr = len;
+ if ((ret = __os_read(dblp->dbenv, &dblp->c_fh, tbuf, nr, &nr)) != 0) {
+ fail = "read";
+ goto err1;
+ }
+ if (len - nr > lp->buffer_size)
+ goto corrupt;
+ if (nr != len) {
+ if (lp->b_off < len - nr)
+ goto corrupt;
+
+ /* Get the rest of the record from the in-memory buffer. */
+ memcpy((u_int8_t *)tbuf + nr, dblp->bufp, len - nr);
+ }
+
+ /* Copy the record into the user's DBT. */
+ if ((ret = __db_retcopy(NULL, dbt, tbuf, len,
+ &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0)
+ goto err2;
+ __os_free(tbuf, 0);
+ tbuf = NULL;
+
+cksum: /*
+ * If the user specified a partial record read, the checksum can't
+ * match. It's not an obvious thing to do, but a user testing for
+ * the length of a record might do it.
+ */
+ if (!F_ISSET(dbt, DB_DBT_PARTIAL) &&
+ hdr.cksum != __ham_func4(NULL, dbt->data, dbt->size)) {
+ if (!silent)
+ __db_err(dbenv, "log_get: checksum mismatch");
+ goto corrupt;
+ }
+
+ /* Update the cursor and the return lsn. */
+ dblp->c_off = hdr.prev;
+ dblp->c_len = hdr.len;
+ dblp->c_lsn = nlsn;
+ *alsn = nlsn;
+
+ return (0);
+
+corrupt:/*
+ * This is the catchall -- for some reason we didn't find enough
+ * information or it wasn't reasonable information, and it wasn't
+ * because a system call failed.
+ */
+ ret = EIO;
+ fail = "read";
+
+err1: if (!silent) {
+ if (fail == NULL)
+ __db_err(dbenv, "log_get: %s", db_strerror(ret));
+ else
+ __db_err(dbenv,
+ "log_get: %s: %s", fail, db_strerror(ret));
+ }
+
+err2: if (np != NULL)
+ __os_freestr(np);
+ if (tbuf != NULL)
+ __os_free(tbuf, 0);
+ return (ret);
+}
diff --git a/bdb/log/log_method.c b/bdb/log/log_method.c
new file mode 100644
index 00000000000..883f485d891
--- /dev/null
+++ b/bdb/log/log_method.c
@@ -0,0 +1,121 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: log_method.c,v 11.14 2000/11/30 00:58:40 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_RPC
+#include "db_server.h"
+#endif
+
+#include "db_int.h"
+#include "log.h"
+
+#ifdef HAVE_RPC
+#include "gen_client_ext.h"
+#include "rpc_client_ext.h"
+#endif
+
+static int __log_set_lg_max __P((DB_ENV *, u_int32_t));
+static int __log_set_lg_bsize __P((DB_ENV *, u_int32_t));
+static int __log_set_lg_dir __P((DB_ENV *, const char *));
+
+/*
+ * __log_dbenv_create --
+ * Log specific initialization of the DB_ENV structure.
+ *
+ * PUBLIC: void __log_dbenv_create __P((DB_ENV *));
+ */
+void
+__log_dbenv_create(dbenv)
+ DB_ENV *dbenv;
+{
+ dbenv->lg_bsize = LG_BSIZE_DEFAULT;
+ dbenv->set_lg_bsize = __log_set_lg_bsize;
+
+ dbenv->lg_max = LG_MAX_DEFAULT;
+ dbenv->set_lg_max = __log_set_lg_max;
+
+ dbenv->set_lg_dir = __log_set_lg_dir;
+#ifdef HAVE_RPC
+ /*
+ * If we have a client, overwrite what we just setup to
+ * point to client functions.
+ */
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) {
+ dbenv->set_lg_bsize = __dbcl_set_lg_bsize;
+ dbenv->set_lg_max = __dbcl_set_lg_max;
+ dbenv->set_lg_dir = __dbcl_set_lg_dir;
+ }
+#endif
+}
+
+/*
+ * __log_set_lg_bsize --
+ * Set the log buffer size.
+ */
+static int
+__log_set_lg_bsize(dbenv, lg_bsize)
+ DB_ENV *dbenv;
+ u_int32_t lg_bsize;
+{
+ ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lg_bsize");
+
+ /* Let's not be silly. */
+ if (lg_bsize > dbenv->lg_max / 4) {
+ __db_err(dbenv, "log buffer size must be <= log file size / 4");
+ return (EINVAL);
+ }
+
+ dbenv->lg_bsize = lg_bsize;
+ return (0);
+}
+
+/*
+ * __log_set_lg_max --
+ * Set the maximum log file size.
+ */
+static int
+__log_set_lg_max(dbenv, lg_max)
+ DB_ENV *dbenv;
+ u_int32_t lg_max;
+{
+ ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lg_max");
+
+ /* Let's not be silly. */
+ if (lg_max < dbenv->lg_bsize * 4) {
+ __db_err(dbenv, "log file size must be >= log buffer size * 4");
+ return (EINVAL);
+ }
+
+ dbenv->lg_max = lg_max;
+ return (0);
+}
+
+/*
+ * __log_set_lg_dir --
+ * Set the log file directory.
+ */
+static int
+__log_set_lg_dir(dbenv, dir)
+ DB_ENV *dbenv;
+ const char *dir;
+{
+ if (dbenv->db_log_dir != NULL)
+ __os_freestr(dbenv->db_log_dir);
+ return (__os_strdup(dbenv, dir, &dbenv->db_log_dir));
+}
diff --git a/bdb/log/log_put.c b/bdb/log/log_put.c
new file mode 100644
index 00000000000..e5cdedb5493
--- /dev/null
+++ b/bdb/log/log_put.c
@@ -0,0 +1,701 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: log_put.c,v 11.26 2000/11/30 00:58:40 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#if TIME_WITH_SYS_TIME
+#include <sys/time.h>
+#include <time.h>
+#else
+#if HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <time.h>
+#endif
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_RPC
+#include "db_server.h"
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "log.h"
+#include "hash.h"
+#include "clib_ext.h"
+
+#ifdef HAVE_RPC
+#include "gen_client_ext.h"
+#include "rpc_client_ext.h"
+#endif
+
+static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t));
+static int __log_flush __P((DB_LOG *, const DB_LSN *));
+static int __log_newfh __P((DB_LOG *));
+static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
+static int __log_open_files __P((DB_ENV *));
+static int __log_write __P((DB_LOG *, void *, u_int32_t));
+
+/*
+ * log_put --
+ * Write a log record.
+ */
+int
+log_put(dbenv, lsn, dbt, flags)
+ DB_ENV *dbenv;
+ DB_LSN *lsn;
+ const DBT *dbt;
+ u_int32_t flags;
+{
+ DB_LOG *dblp;
+ int ret;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_log_put(dbenv, lsn, dbt, flags));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG);
+
+ /* Validate arguments. */
+ if (flags != 0 && flags != DB_CHECKPOINT &&
+ flags != DB_CURLSN && flags != DB_FLUSH)
+ return (__db_ferr(dbenv, "log_put", 0));
+
+ dblp = dbenv->lg_handle;
+ R_LOCK(dbenv, &dblp->reginfo);
+ ret = __log_put(dbenv, lsn, dbt, flags);
+ R_UNLOCK(dbenv, &dblp->reginfo);
+ return (ret);
+}
+
+/*
+ * __log_put --
+ * Write a log record; internal version.
+ *
+ * PUBLIC: int __log_put __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t));
+ */
+int
+__log_put(dbenv, lsn, dbt, flags)
+ DB_ENV *dbenv;
+ DB_LSN *lsn;
+ const DBT *dbt;
+ u_int32_t flags;
+{
+ DBT t;
+ DB_LOG *dblp;
+ LOG *lp;
+ u_int32_t lastoff;
+ int ret;
+
+ dblp = dbenv->lg_handle;
+ lp = dblp->reginfo.primary;
+
+ /*
+ * If the application just wants to know where we are, fill in
+ * the information. Currently used by the transaction manager
+ * to avoid writing TXN_begin records.
+ */
+ if (flags == DB_CURLSN) {
+ lsn->file = lp->lsn.file;
+ lsn->offset = lp->lsn.offset;
+ return (0);
+ }
+
+ /* If this information won't fit in the file, swap files. */
+ if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) {
+ if (sizeof(HDR) +
+ sizeof(LOGP) + dbt->size > lp->persist.lg_max) {
+ __db_err(dbenv,
+ "log_put: record larger than maximum file size");
+ return (EINVAL);
+ }
+
+ /* Flush the log. */
+ if ((ret = __log_flush(dblp, NULL)) != 0)
+ return (ret);
+
+ /*
+ * Save the last known offset from the previous file, we'll
+ * need it to initialize the persistent header information.
+ */
+ lastoff = lp->lsn.offset;
+
+ /* Point the current LSN to the new file. */
+ ++lp->lsn.file;
+ lp->lsn.offset = 0;
+
+ /* Reset the file write offset. */
+ lp->w_off = 0;
+ } else
+ lastoff = 0;
+
+ /* Initialize the LSN information returned to the user. */
+ lsn->file = lp->lsn.file;
+ lsn->offset = lp->lsn.offset;
+
+ /*
+ * Insert persistent information as the first record in every file.
+ * Note that the previous length is wrong for the very first record
+ * of the log, but that's okay, we check for it during retrieval.
+ */
+ if (lp->lsn.offset == 0) {
+ t.data = &lp->persist;
+ t.size = sizeof(LOGP);
+ if ((ret = __log_putr(dblp, lsn,
+ &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0)
+ return (ret);
+
+ /*
+ * Record files open in this log.
+ * If we are recovering then we are in the
+ * process of outputting the files, don't do
+ * it again.
+ */
+ if (!F_ISSET(dblp, DBLOG_RECOVER) &&
+ (ret = __log_open_files(dbenv)) != 0)
+ return (ret);
+
+ /* Update the LSN information returned to the user. */
+ lsn->file = lp->lsn.file;
+ lsn->offset = lp->lsn.offset;
+ }
+
+ /* Write the application's log record. */
+ if ((ret = __log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len)) != 0)
+ return (ret);
+
+ /*
+ * On a checkpoint, we:
+ * Put out the checkpoint record (above).
+ * Save the LSN of the checkpoint in the shared region.
+ * Append the set of file name information into the log.
+ */
+ if (flags == DB_CHECKPOINT) {
+ lp->chkpt_lsn = *lsn;
+ if ((ret = __log_open_files(dbenv)) != 0)
+ return (ret);
+ }
+
+ /*
+ * On a checkpoint or when flush is requested, we:
+ * Flush the current buffer contents to disk.
+ * Sync the log to disk.
+ */
+ if (flags == DB_FLUSH || flags == DB_CHECKPOINT)
+ if ((ret = __log_flush(dblp, NULL)) != 0)
+ return (ret);
+
+ /*
+ * On a checkpoint, we:
+ * Save the time the checkpoint was written.
+ * Reset the bytes written since the last checkpoint.
+ */
+ if (flags == DB_CHECKPOINT) {
+ (void)time(&lp->chkpt);
+ lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
+ }
+ return (0);
+}
+
+/*
+ * __log_putr --
+ * Actually put a record into the log.
+ */
+static int
+__log_putr(dblp, lsn, dbt, prev)
+ DB_LOG *dblp;
+ DB_LSN *lsn;
+ const DBT *dbt;
+ u_int32_t prev;
+{
+ HDR hdr;
+ LOG *lp;
+ int ret;
+
+ lp = dblp->reginfo.primary;
+
+ /*
+ * Initialize the header. If we just switched files, lsn.offset will
+ * be 0, and what we really want is the offset of the previous record
+ * in the previous file. Fortunately, prev holds the value we want.
+ */
+ hdr.prev = prev;
+ hdr.len = sizeof(HDR) + dbt->size;
+ hdr.cksum = __ham_func4(NULL, dbt->data, dbt->size);
+
+ if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0)
+ return (ret);
+ lp->len = sizeof(HDR);
+ lp->lsn.offset += sizeof(HDR);
+
+ if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0)
+ return (ret);
+ lp->len += dbt->size;
+ lp->lsn.offset += dbt->size;
+ return (0);
+}
+
+/*
+ * log_flush --
+ * Write all records less than or equal to the specified LSN.
+ */
+int
+log_flush(dbenv, lsn)
+ DB_ENV *dbenv;
+ const DB_LSN *lsn;
+{
+ DB_LOG *dblp;
+ int ret;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_log_flush(dbenv, lsn));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG);
+
+ dblp = dbenv->lg_handle;
+ R_LOCK(dbenv, &dblp->reginfo);
+ ret = __log_flush(dblp, lsn);
+ R_UNLOCK(dbenv, &dblp->reginfo);
+ return (ret);
+}
+
+/*
+ * __log_flush --
+ * Write all records less than or equal to the specified LSN; internal
+ * version.
+ */
+static int
+__log_flush(dblp, lsn)
+ DB_LOG *dblp;
+ const DB_LSN *lsn;
+{
+ DB_LSN t_lsn;
+ LOG *lp;
+ int current, ret;
+
+ ret = 0;
+ lp = dblp->reginfo.primary;
+
+ /*
+ * If no LSN specified, flush the entire log by setting the flush LSN
+ * to the last LSN written in the log. Otherwise, check that the LSN
+ * isn't a non-existent record for the log.
+ */
+ if (lsn == NULL) {
+ t_lsn.file = lp->lsn.file;
+ t_lsn.offset = lp->lsn.offset - lp->len;
+ lsn = &t_lsn;
+ } else
+ if (lsn->file > lp->lsn.file ||
+ (lsn->file == lp->lsn.file &&
+ lsn->offset > lp->lsn.offset - lp->len)) {
+ __db_err(dblp->dbenv,
+ "log_flush: LSN past current end-of-log");
+ return (EINVAL);
+ }
+
+ /*
+ * If the LSN is less than or equal to the last-sync'd LSN, we're done.
+ * Note, the last-sync LSN saved in s_lsn is the LSN of the first byte
+ * after the byte we absolutely know was written to disk, so the test
+ * is <, not <=.
+ */
+ if (lsn->file < lp->s_lsn.file ||
+ (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset))
+ return (0);
+
+ /*
+ * We may need to write the current buffer. We have to write the
+ * current buffer if the flush LSN is greater than or equal to the
+ * buffer's starting LSN.
+ */
+ current = 0;
+ if (lp->b_off != 0 && log_compare(lsn, &lp->f_lsn) >= 0) {
+ if ((ret = __log_write(dblp, dblp->bufp, lp->b_off)) != 0)
+ return (ret);
+
+ lp->b_off = 0;
+ current = 1;
+ }
+
+ /*
+ * It's possible that this thread may never have written to this log
+ * file. Acquire a file descriptor if we don't already have one.
+ * One last check -- if we're not writing anything from the current
+ * buffer, don't bother. We have nothing to write and nothing to
+ * sync.
+ */
+ if (dblp->lfname != lp->lsn.file) {
+ if (!current)
+ return (0);
+ if ((ret = __log_newfh(dblp)) != 0)
+ return (ret);
+ }
+
+ /* Sync all writes to disk. */
+ if ((ret = __os_fsync(dblp->dbenv, &dblp->lfh)) != 0)
+ return (__db_panic(dblp->dbenv, ret));
+ ++lp->stat.st_scount;
+
+ /* Set the last-synced LSN, using the on-disk write offset. */
+ lp->s_lsn.file = lp->f_lsn.file;
+ lp->s_lsn.offset = lp->w_off;
+
+ return (0);
+}
+
+/*
+ * __log_fill --
+ * Write information into the log.
+ */
+static int
+__log_fill(dblp, lsn, addr, len)
+ DB_LOG *dblp;
+ DB_LSN *lsn;
+ void *addr;
+ u_int32_t len;
+{
+ LOG *lp;
+ u_int32_t bsize, nrec;
+ size_t nw, remain;
+ int ret;
+
+ lp = dblp->reginfo.primary;
+ bsize = lp->buffer_size;
+
+ while (len > 0) { /* Copy out the data. */
+ /*
+ * If we're beginning a new buffer, note the user LSN to which
+ * the first byte of the buffer belongs. We have to know this
+ * when flushing the buffer so that we know if the in-memory
+ * buffer needs to be flushed.
+ */
+ if (lp->b_off == 0)
+ lp->f_lsn = *lsn;
+
+ /*
+ * If we're on a buffer boundary and the data is big enough,
+ * copy as many records as we can directly from the data.
+ */
+ if (lp->b_off == 0 && len >= bsize) {
+ nrec = len / bsize;
+ if ((ret = __log_write(dblp, addr, nrec * bsize)) != 0)
+ return (ret);
+ addr = (u_int8_t *)addr + nrec * bsize;
+ len -= nrec * bsize;
+ ++lp->stat.st_wcount_fill;
+ continue;
+ }
+
+ /* Figure out how many bytes we can copy this time. */
+ remain = bsize - lp->b_off;
+ nw = remain > len ? len : remain;
+ memcpy(dblp->bufp + lp->b_off, addr, nw);
+ addr = (u_int8_t *)addr + nw;
+ len -= nw;
+ lp->b_off += nw;
+
+ /* If we fill the buffer, flush it. */
+ if (lp->b_off == bsize) {
+ if ((ret = __log_write(dblp, dblp->bufp, bsize)) != 0)
+ return (ret);
+ lp->b_off = 0;
+ ++lp->stat.st_wcount_fill;
+ }
+ }
+ return (0);
+}
+
+/*
+ * __log_write --
+ * Write the log buffer to disk.
+ */
+static int
+__log_write(dblp, addr, len)
+ DB_LOG *dblp;
+ void *addr;
+ u_int32_t len;
+{
+ LOG *lp;
+ size_t nw;
+ int ret;
+
+ /*
+ * If we haven't opened the log file yet or the current one
+ * has changed, acquire a new log file.
+ */
+ lp = dblp->reginfo.primary;
+ if (!F_ISSET(&dblp->lfh, DB_FH_VALID) || dblp->lfname != lp->lsn.file)
+ if ((ret = __log_newfh(dblp)) != 0)
+ return (ret);
+
+ /*
+ * Seek to the offset in the file (someone may have written it
+ * since we last did).
+ */
+ if ((ret =
+ __os_seek(dblp->dbenv,
+ &dblp->lfh, 0, 0, lp->w_off, 0, DB_OS_SEEK_SET)) != 0 ||
+ (ret = __os_write(dblp->dbenv, &dblp->lfh, addr, len, &nw)) != 0)
+ return (__db_panic(dblp->dbenv, ret));
+ if (nw != len) {
+ __db_err(dblp->dbenv, "Short write while writing log");
+ return (EIO);
+ }
+
+ /* Reset the buffer offset and update the seek offset. */
+ lp->w_off += len;
+
+ /* Update written statistics. */
+ if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
+ lp->stat.st_w_bytes -= MEGABYTE;
+ ++lp->stat.st_w_mbytes;
+ }
+ if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) {
+ lp->stat.st_wc_bytes -= MEGABYTE;
+ ++lp->stat.st_wc_mbytes;
+ }
+ ++lp->stat.st_wcount;
+
+ return (0);
+}
+
+/*
+ * log_file --
+ * Map a DB_LSN to a file name.
+ */
+int
+log_file(dbenv, lsn, namep, len)
+ DB_ENV *dbenv;
+ const DB_LSN *lsn;
+ char *namep;
+ size_t len;
+{
+ DB_LOG *dblp;
+ int ret;
+ char *name;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_log_file(dbenv, lsn, namep, len));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG);
+
+ dblp = dbenv->lg_handle;
+ R_LOCK(dbenv, &dblp->reginfo);
+ ret = __log_name(dblp, lsn->file, &name, NULL, 0);
+ R_UNLOCK(dbenv, &dblp->reginfo);
+ if (ret != 0)
+ return (ret);
+
+ /* Check to make sure there's enough room and copy the name. */
+ if (len < strlen(name) + 1) {
+ *namep = '\0';
+ __db_err(dbenv, "log_file: name buffer is too short");
+ return (EINVAL);
+ }
+ (void)strcpy(namep, name);
+ __os_freestr(name);
+
+ return (0);
+}
+
+/*
+ * __log_newfh --
+ * Acquire a file handle for the current log file.
+ */
+static int
+__log_newfh(dblp)
+ DB_LOG *dblp;
+{
+ LOG *lp;
+ int ret;
+ char *name;
+
+ /* Close any previous file descriptor. */
+ if (F_ISSET(&dblp->lfh, DB_FH_VALID))
+ (void)__os_closehandle(&dblp->lfh);
+
+ /* Get the path of the new file and open it. */
+ lp = dblp->reginfo.primary;
+ dblp->lfname = lp->lsn.file;
+
+ /*
+ * Adding DB_OSO_LOG to the flags may add additional platform-specific
+ * optimizations. On WinNT, the logfile is preallocated, which may
+ * have a time penalty at startup, but have better overall throughput.
+ * We are not certain that this works reliably, so enable at your own
+ * risk.
+ *
+ * XXX:
+ * Initialize the log file size. This is a hack to push the log's
+ * maximum size down into the Windows __os_open routine, because it
+ * wants to pre-allocate it.
+ */
+ dblp->lfh.log_size = dblp->dbenv->lg_max;
+ if ((ret = __log_name(dblp, dblp->lfname,
+ &name, &dblp->lfh,
+ DB_OSO_CREATE |/* DB_OSO_LOG |*/ DB_OSO_SEQ)) != 0)
+ __db_err(dblp->dbenv,
+ "log_put: %s: %s", name, db_strerror(ret));
+
+ __os_freestr(name);
+ return (ret);
+}
+
+/*
+ * __log_name --
+ * Return the log name for a particular file, and optionally open it.
+ *
+ * PUBLIC: int __log_name __P((DB_LOG *,
+ * PUBLIC: u_int32_t, char **, DB_FH *, u_int32_t));
+ */
+int
+__log_name(dblp, filenumber, namep, fhp, flags)
+ DB_LOG *dblp;
+ u_int32_t filenumber, flags;
+ char **namep;
+ DB_FH *fhp;
+{
+ LOG *lp;
+ int ret;
+ char *oname;
+ char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20];
+
+ lp = dblp->reginfo.primary;
+
+ /*
+ * !!!
+ * The semantics of this routine are bizarre.
+ *
+ * The reason for all of this is that we need a place where we can
+ * intercept requests for log files, and, if appropriate, check for
+ * both the old-style and new-style log file names. The trick is
+ * that all callers of this routine that are opening the log file
+ * read-only want to use an old-style file name if they can't find
+ * a match using a new-style name. The only down-side is that some
+ * callers may check for the old-style when they really don't need
+ * to, but that shouldn't mess up anything, and we only check for
+ * the old-style name when we've already failed to find a new-style
+ * one.
+ *
+ * Create a new-style file name, and if we're not going to open the
+ * file, return regardless.
+ */
+ (void)snprintf(new, sizeof(new), LFNAME, filenumber);
+ if ((ret = __db_appname(dblp->dbenv,
+ DB_APP_LOG, NULL, new, 0, NULL, namep)) != 0 || fhp == NULL)
+ return (ret);
+
+ /* Open the new-style file -- if we succeed, we're done. */
+ if ((ret = __os_open(dblp->dbenv,
+ *namep, flags, lp->persist.mode, fhp)) == 0)
+ return (0);
+
+ /*
+ * The open failed... if the DB_RDONLY flag isn't set, we're done,
+ * the caller isn't interested in old-style files.
+ */
+ if (!LF_ISSET(DB_OSO_RDONLY)) {
+ __db_err(dblp->dbenv,
+ "%s: log file open failed: %s", *namep, db_strerror(ret));
+ return (__db_panic(dblp->dbenv, ret));
+ }
+
+ /* Create an old-style file name. */
+ (void)snprintf(old, sizeof(old), LFNAME_V1, filenumber);
+ if ((ret = __db_appname(dblp->dbenv,
+ DB_APP_LOG, NULL, old, 0, NULL, &oname)) != 0)
+ goto err;
+
+ /*
+ * Open the old-style file -- if we succeed, we're done. Free the
+ * space allocated for the new-style name and return the old-style
+ * name to the caller.
+ */
+ if ((ret = __os_open(dblp->dbenv,
+ oname, flags, lp->persist.mode, fhp)) == 0) {
+ __os_freestr(*namep);
+ *namep = oname;
+ return (0);
+ }
+
+ /*
+ * Couldn't find either style of name -- return the new-style name
+ * for the caller's error message. If it's an old-style name that's
+ * actually missing we're going to confuse the user with the error
+ * message, but that implies that not only were we looking for an
+ * old-style name, but we expected it to exist and we weren't just
+ * looking for any log file. That's not a likely error.
+ */
+err: __os_freestr(oname);
+ return (ret);
+}
+
+static int
+__log_open_files(dbenv)
+ DB_ENV *dbenv;
+{
+ DB_LOG *dblp;
+ DB_LSN r_unused;
+ DBT fid_dbt, t;
+ FNAME *fnp;
+ LOG *lp;
+ int ret;
+
+ dblp = dbenv->lg_handle;
+ lp = dblp->reginfo.primary;
+
+ for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname);
+ fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
+ if (fnp->ref == 0) /* Entry not in use. */
+ continue;
+ if (fnp->name_off != INVALID_ROFF) {
+ memset(&t, 0, sizeof(t));
+ t.data = R_ADDR(&dblp->reginfo, fnp->name_off);
+ t.size = strlen(t.data) + 1;
+ }
+ memset(&fid_dbt, 0, sizeof(fid_dbt));
+ fid_dbt.data = fnp->ufid;
+ fid_dbt.size = DB_FILE_ID_LEN;
+ /*
+ * Output LOG_CHECKPOINT records which will be
+ * processed during the OPENFILES pass of recovery.
+ * At the end of recovery we want to output the
+ * files that were open so that a future recovery
+ * run will have the correct files open during
+ * a backward pass. For this we output LOG_CLOSE
+ * records so that the files will be closed on
+ * the forward pass.
+ */
+ if ((ret = __log_register_log(dbenv,
+ NULL, &r_unused, 0,
+ F_ISSET(dblp, DBLOG_RECOVER) ? LOG_CLOSE : LOG_CHECKPOINT,
+ fnp->name_off == INVALID_ROFF ? NULL : &t,
+ &fid_dbt, fnp->id, fnp->s_type, fnp->meta_pgno)) != 0)
+ return (ret);
+ }
+ return (0);
+}
diff --git a/bdb/log/log_rec.c b/bdb/log/log_rec.c
new file mode 100644
index 00000000000..a871848295e
--- /dev/null
+++ b/bdb/log/log_rec.c
@@ -0,0 +1,621 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ * The President and Fellows of Harvard University. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: log_rec.c,v 11.48 2001/01/11 18:19:53 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_am.h"
+#include "log.h"
+
+static int __log_do_open __P((DB_ENV *, DB_LOG *,
+ u_int8_t *, char *, DBTYPE, int32_t, db_pgno_t));
+static int __log_open_file __P((DB_ENV *, DB_LOG *, __log_register_args *));
+
+/*
+ * PUBLIC: int __log_register_recover
+ * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__log_register_recover(dbenv, dbtp, lsnp, op, info)
+ DB_ENV *dbenv;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops op;
+ void *info;
+{
+ DB_ENTRY *dbe;
+ DB_LOG *logp;
+ DB *dbp;
+ __log_register_args *argp;
+ int do_rem, ret, t_ret;
+
+ logp = dbenv->lg_handle;
+ dbp = NULL;
+
+#ifdef DEBUG_RECOVER
+ REC_PRINT(__log_register_print);
+#endif
+ COMPQUIET(lsnp, NULL);
+
+ if ((ret = __log_register_read(dbenv, dbtp->data, &argp)) != 0)
+ goto out;
+
+ if ((argp->opcode == LOG_OPEN &&
+ (DB_REDO(op) || op == DB_TXN_OPENFILES)) ||
+ (argp->opcode == LOG_CLOSE && DB_UNDO(op))) {
+ /*
+ * If we are redoing an open or undoing a close, then we need
+ * to open a file. We must open the file even if
+ * the meta page is not yet written as we may be creating it.
+ */
+ if (op == DB_TXN_OPENFILES)
+ F_SET(logp, DBLOG_FORCE_OPEN);
+ ret = __log_open_file(dbenv, logp, argp);
+ F_CLR(logp, DBLOG_FORCE_OPEN);
+ if (ret == ENOENT || ret == EINVAL) {
+ if (op == DB_TXN_OPENFILES && argp->name.size != 0 &&
+ (ret = __db_txnlist_delete(dbenv, info,
+ argp->name.data, argp->fileid, 0)) != 0)
+ goto out;
+ ret = 0;
+ }
+ } else if (argp->opcode != LOG_CHECKPOINT) {
+ /*
+ * If we are undoing an open, then we need to close the file.
+ *
+ * If the file is deleted, then we can just ignore this close.
+ * Otherwise, we should usually have a valid dbp we should
+ * close or whose reference count should be decremented.
+ * However, if we shut down without closing a file, we may, in
+ * fact, not have the file open, and that's OK.
+ */
+ do_rem = 0;
+ MUTEX_THREAD_LOCK(dbenv, logp->mutexp);
+ if (argp->fileid < logp->dbentry_cnt) {
+ dbe = &logp->dbentry[argp->fileid];
+
+ DB_ASSERT(dbe->refcount == 1);
+
+ ret = __db_txnlist_close(info,
+ argp->fileid, dbe->count);
+ if ((dbp = TAILQ_FIRST(&dbe->dblist)) != NULL)
+ (void)log_unregister(dbenv, dbp);
+ do_rem = 1;
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, logp->mutexp);
+ if (do_rem) {
+ (void)__log_rem_logid(logp, dbp, argp->fileid);
+ /*
+ * If remove or rename has closed the file, don't
+ * sync.
+ */
+ if (dbp != NULL &&
+ (t_ret = dbp->close(dbp,
+ dbp->mpf == NULL ? DB_NOSYNC : 0)) != 0 && ret == 0)
+ ret = t_ret;
+ }
+ } else if (DB_UNDO(op) || op == DB_TXN_OPENFILES) {
+ /*
+ * It's a checkpoint and we are rolling backward. It
+ * is possible that the system was shut down and thus
+ * ended with a stable checkpoint; this file was never
+ * closed and has therefore not been reopened yet. If
+ * so, we need to try to open it.
+ */
+ ret = __log_open_file(dbenv, logp, argp);
+ if (ret == ENOENT || ret == EINVAL) {
+ if (argp->name.size != 0 && (ret =
+ __db_txnlist_delete(dbenv, info,
+ argp->name.data, argp->fileid, 0)) != 0)
+ goto out;
+ ret = 0;
+ }
+ }
+
+out: if (argp != NULL)
+ __os_free(argp, 0);
+ return (ret);
+}
+
+/*
+ * __log_open_file --
+ * Called during log_register recovery. Make sure that we have an
+ * entry in the dbentry table for this ndx. Returns 0 on success,
+ * non-zero on error.
+ */
+static int
+__log_open_file(dbenv, lp, argp)
+ DB_ENV *dbenv;
+ DB_LOG *lp;
+ __log_register_args *argp;
+{
+ DB_ENTRY *dbe;
+ DB *dbp;
+
+ /*
+ * We never re-open temporary files. Temp files are only
+ * useful during aborts in which case the dbp was entered
+ * when the file was registered. During recovery, we treat
+ * temp files as properly deleted files, allowing the open to
+ * fail and not reporting any errors when recovery fails to
+ * get a valid dbp from db_fileid_to_db.
+ */
+ if (argp->name.size == 0) {
+ (void)__log_add_logid(dbenv, lp, NULL, argp->fileid);
+ return (ENOENT);
+ }
+
+ /*
+ * Because of reference counting, we cannot automatically close files
+ * during recovery, so when we're opening, we have to check that the
+ * name we are opening is what we expect. If it's not, then we close
+ * the old file and open the new one.
+ */
+ MUTEX_THREAD_LOCK(dbenv, lp->mutexp);
+ if (argp->fileid < lp->dbentry_cnt)
+ dbe = &lp->dbentry[argp->fileid];
+ else
+ dbe = NULL;
+
+ if (dbe != NULL) {
+ dbe->deleted = 0;
+ if ((dbp = TAILQ_FIRST(&dbe->dblist)) != NULL) {
+ if (dbp->meta_pgno != argp->meta_pgno ||
+ memcmp(dbp->fileid,
+ argp->uid.data, DB_FILE_ID_LEN) != 0) {
+ MUTEX_THREAD_UNLOCK(dbenv, lp->mutexp);
+ goto reopen;
+ }
+ if (!F_ISSET(lp, DBLOG_RECOVER))
+ dbe->refcount++;
+ MUTEX_THREAD_UNLOCK(dbenv, lp->mutexp);
+ return (0);
+ }
+ }
+
+ MUTEX_THREAD_UNLOCK(dbenv, lp->mutexp);
+ if (0) {
+reopen: (void)log_unregister(dbp->dbenv, dbp);
+ (void)__log_rem_logid(lp, dbp, argp->fileid);
+ dbp->close(dbp, 0);
+ }
+
+ return (__log_do_open(dbenv, lp,
+ argp->uid.data, argp->name.data,
+ argp->ftype, argp->fileid, argp->meta_pgno));
+}
+
+/*
+ * log_reopen_file -- close and reopen a db file.
+ * Must be called when a metadata page changes.
+ *
+ * PUBLIC: int __log_reopen_file __P((DB_ENV *,
+ * PUBLIC: char *, int32_t, u_int8_t *, db_pgno_t));
+ *
+ */
+int
+__log_reopen_file(dbenv, name, ndx, fileid, meta_pgno)
+ DB_ENV *dbenv;
+ char *name;
+ int32_t ndx;
+ u_int8_t *fileid;
+ db_pgno_t meta_pgno;
+{
+ DB *dbp;
+ DB_LOG *logp;
+ DBTYPE ftype;
+ FNAME *fnp;
+ LOG *lp;
+ char *tmp_name;
+ int ret;
+
+ logp = dbenv->lg_handle;
+
+ if (name == NULL) {
+ R_LOCK(dbenv, &logp->reginfo);
+
+ lp = logp->reginfo.primary;
+
+ for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname);
+ fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
+ if (fnp->ref == 0) /* Entry not in use. */
+ continue;
+ if (memcmp(fnp->ufid, fileid, DB_FILE_ID_LEN) == 0)
+ break;
+ }
+
+ if (fnp == 0 || fnp->name_off == INVALID_ROFF) {
+ __db_err(dbenv,
+ "metasub recover: non-existent file id");
+ return (EINVAL);
+ }
+
+ name = R_ADDR(&logp->reginfo, fnp->name_off);
+ ret = __os_strdup(dbenv, name, &tmp_name);
+ R_UNLOCK(dbenv, &logp->reginfo);
+ if (ret != 0)
+ goto out;
+ name = tmp_name;
+ } else
+ tmp_name = NULL;
+
+ if ((ret = __db_fileid_to_db(dbenv, &dbp, ndx, 0)) != 0)
+ goto out;
+ ftype = dbp->type;
+ (void)log_unregister(dbenv, dbp);
+ (void)__log_rem_logid(logp, dbp, ndx);
+ (void)dbp->close(dbp, 0);
+
+ ret = __log_do_open(dbenv, logp, fileid, name, ftype, ndx, meta_pgno);
+
+ if (tmp_name != NULL)
+ __os_free(tmp_name, 0);
+
+out: return (ret);
+}
+
+/*
+ * __log_do_open --
+ * Open files referenced in the log. This is the part of the open that
+ * is not protected by the thread mutex.
+ */
+static int
+__log_do_open(dbenv, lp, uid, name, ftype, ndx, meta_pgno)
+ DB_ENV *dbenv;
+ DB_LOG *lp;
+ u_int8_t *uid;
+ char *name;
+ DBTYPE ftype;
+ int32_t ndx;
+ db_pgno_t meta_pgno;
+{
+ DB *dbp;
+ int ret;
+ u_int8_t zeroid[DB_FILE_ID_LEN];
+
+ if ((ret = db_create(&dbp, lp->dbenv, 0)) != 0)
+ return (ret);
+
+ dbp->log_fileid = ndx;
+
+ /*
+ * This is needed to signal to the locking routines called while
+ * opening databases that we are potentially undoing a transaction
+ * from an XA process. Since the XA process does not share
+ * locks with the aborting transaction this prevents us from
+ * deadlocking during the open during rollback.
+ * Because this routine is called either during recovery or during an
+ * XA_ABORT, we can safely set DB_AM_RECOVER in the dbp since it
+ * will not be shared with other threads.
+ */
+ F_SET(dbp, DB_AM_RECOVER);
+ if (meta_pgno != PGNO_BASE_MD)
+ memcpy(dbp->fileid, uid, DB_FILE_ID_LEN);
+ dbp->type = ftype;
+ if ((ret =
+ __db_dbopen(dbp, name, 0, __db_omode("rw----"), meta_pgno)) == 0) {
+ /*
+ * Verify that we are opening the same file that we were
+ * referring to when we wrote this log record.
+ */
+ if (memcmp(uid, dbp->fileid, DB_FILE_ID_LEN) != 0) {
+ memset(zeroid, 0, DB_FILE_ID_LEN);
+ if (memcmp(dbp->fileid, zeroid, DB_FILE_ID_LEN) != 0)
+ goto not_right;
+ memcpy(dbp->fileid, uid, DB_FILE_ID_LEN);
+ }
+ if (IS_RECOVERING(dbenv)) {
+ (void)log_register(dbp->dbenv, dbp, name);
+ (void)__log_add_logid(dbenv, lp, dbp, ndx);
+ }
+ return (0);
+ }
+
+not_right:
+ (void)dbp->close(dbp, 0);
+ (void)__log_add_logid(dbenv, lp, NULL, ndx);
+
+ return (ENOENT);
+}
+
+/*
+ * __log_add_logid --
+ * Adds a DB entry to the log's DB entry table.
+ *
+ * PUBLIC: int __log_add_logid __P((DB_ENV *, DB_LOG *, DB *, int32_t));
+ */
+int
+__log_add_logid(dbenv, logp, dbp, ndx)
+ DB_ENV *dbenv;
+ DB_LOG *logp;
+ DB *dbp;
+ int32_t ndx;
+{
+ DB *dbtmp;
+ int32_t i;
+ int ret;
+
+ ret = 0;
+
+ MUTEX_THREAD_LOCK(dbenv, logp->mutexp);
+
+ /*
+ * Check if we need to grow the table. Note, ndx is 0-based (the
+ * index into the DB entry table) an dbentry_cnt is 1-based, the
+ * number of available slots.
+ */
+ if (logp->dbentry_cnt <= ndx) {
+ if ((ret = __os_realloc(dbenv,
+ (ndx + DB_GROW_SIZE) * sizeof(DB_ENTRY),
+ NULL, &logp->dbentry)) != 0)
+ goto err;
+
+ /*
+ * We have moved the head of the queue.
+ * Fix up the queue header of an empty queue or the previous
+ * pointer of the first element.
+ */
+ for (i = 0; i < logp->dbentry_cnt; i++) {
+ if ((dbtmp =
+ TAILQ_FIRST(&logp->dbentry[i].dblist)) == NULL)
+ TAILQ_INIT(&logp->dbentry[i].dblist);
+ else
+ TAILQ_REINSERT_HEAD(
+ &logp->dbentry[i].dblist, dbp, links);
+ }
+
+ /* Initialize the new entries. */
+ for (i = logp->dbentry_cnt; i < ndx + DB_GROW_SIZE; i++) {
+ logp->dbentry[i].count = 0;
+ TAILQ_INIT(&logp->dbentry[i].dblist);
+ logp->dbentry[i].deleted = 0;
+ logp->dbentry[i].refcount = 0;
+ }
+
+ logp->dbentry_cnt = i;
+ }
+
+ if (logp->dbentry[ndx].deleted == 0 &&
+ TAILQ_FIRST(&logp->dbentry[ndx].dblist) == NULL) {
+ logp->dbentry[ndx].count = 0;
+ if (dbp != NULL)
+ TAILQ_INSERT_HEAD(&logp->dbentry[ndx].dblist,
+ dbp, links);
+ logp->dbentry[ndx].deleted = dbp == NULL;
+ logp->dbentry[ndx].refcount = 1;
+ } else if (!F_ISSET(logp, DBLOG_RECOVER)) {
+ if (dbp != NULL)
+ TAILQ_INSERT_HEAD(&logp->dbentry[ndx].dblist,
+ dbp, links);
+ logp->dbentry[ndx].refcount++;
+ }
+
+err: MUTEX_THREAD_UNLOCK(dbenv, logp->mutexp);
+ return (ret);
+}
+
+/*
+ * __db_fileid_to_db --
+ * Return the DB corresponding to the specified fileid.
+ *
+ * PUBLIC: int __db_fileid_to_db __P((DB_ENV *, DB **, int32_t, int));
+ */
+int
+__db_fileid_to_db(dbenv, dbpp, ndx, inc)
+ DB_ENV *dbenv;
+ DB **dbpp;
+ int32_t ndx;
+ int inc;
+{
+ DB_LOG *logp;
+ DB *dbp;
+ FNAME *fname;
+ int ret;
+ char *name;
+
+ ret = 0;
+ logp = dbenv->lg_handle;
+
+ MUTEX_THREAD_LOCK(dbenv, logp->mutexp);
+
+ /*
+ * Under XA, a process different than the one issuing DB operations
+ * may abort a transaction. In this case, recovery routines are run
+ * by a process that does not necessarily have the file open, so we
+ * we must open the file explicitly.
+ */
+ if (ndx >= logp->dbentry_cnt ||
+ (!logp->dbentry[ndx].deleted &&
+ (dbp = TAILQ_FIRST(&logp->dbentry[ndx].dblist)) == NULL)) {
+ if (F_ISSET(logp, DBLOG_RECOVER)) {
+ ret = ENOENT;
+ goto err;
+ }
+ if (__log_lid_to_fname(logp, ndx, &fname) != 0) {
+ /* Couldn't find entry; this is a fatal error. */
+ __db_err(dbenv, "Missing log fileid entry");
+ ret = EINVAL;
+ goto err;
+ }
+ name = R_ADDR(&logp->reginfo, fname->name_off);
+
+ /*
+ * __log_do_open is called without protection of the
+ * log thread lock.
+ */
+ MUTEX_THREAD_UNLOCK(dbenv, logp->mutexp);
+
+ /*
+ * At this point, we are not holding the thread lock, so exit
+ * directly instead of going through the exit code at the
+ * bottom. If the __log_do_open succeeded, then we don't need
+ * to do any of the remaining error checking at the end of this
+ * routine.
+ */
+ if ((ret = __log_do_open(dbenv, logp,
+ fname->ufid, name, fname->s_type,
+ ndx, fname->meta_pgno)) != 0)
+ return (ret);
+
+ *dbpp = TAILQ_FIRST(&logp->dbentry[ndx].dblist);
+ return (0);
+ }
+
+ /*
+ * Return DB_DELETED if the file has been deleted (it's not an error).
+ */
+ if (logp->dbentry[ndx].deleted) {
+ ret = DB_DELETED;
+ if (inc)
+ logp->dbentry[ndx].count++;
+ goto err;
+ }
+
+ /*
+ * Otherwise return 0, but if we don't have a corresponding DB, it's
+ * an error.
+ */
+ if ((*dbpp = TAILQ_FIRST(&logp->dbentry[ndx].dblist)) == NULL)
+ ret = ENOENT;
+
+err: MUTEX_THREAD_UNLOCK(dbenv, logp->mutexp);
+ return (ret);
+}
+
+/*
+ * __log_close_files --
+ * Close files that were opened by the recovery daemon. We sync the
+ * file, unless its mpf pointer has been NULLed by a db_remove or
+ * db_rename. We may not have flushed the log_register record that
+ * closes the file.
+ *
+ * PUBLIC: void __log_close_files __P((DB_ENV *));
+ */
+void
+__log_close_files(dbenv)
+ DB_ENV *dbenv;
+{
+ DB_ENTRY *dbe;
+ DB_LOG *logp;
+ DB *dbp;
+ int32_t i;
+
+ logp = dbenv->lg_handle;
+ MUTEX_THREAD_LOCK(dbenv, logp->mutexp);
+ for (i = 0; i < logp->dbentry_cnt; i++) {
+ dbe = &logp->dbentry[i];
+ while ((dbp = TAILQ_FIRST(&dbe->dblist)) != NULL) {
+ (void)log_unregister(dbenv, dbp);
+ TAILQ_REMOVE(&dbe->dblist, dbp, links);
+ (void)dbp->close(dbp, dbp->mpf == NULL ? DB_NOSYNC : 0);
+ }
+ dbe->deleted = 0;
+ dbe->refcount = 0;
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, logp->mutexp);
+}
+
+/*
+ * __log_rem_logid
+ * Remove an entry from the log table. Find the appropriate DB and
+ * unlink it from the linked list off the table. If the DB is NULL, treat
+ * this as a simple refcount decrement.
+ *
+ * PUBLIC: void __log_rem_logid __P((DB_LOG *, DB *, int32_t));
+ */
+void
+__log_rem_logid(logp, dbp, ndx)
+ DB_LOG *logp;
+ DB *dbp;
+ int32_t ndx;
+{
+ DB *xdbp;
+
+ MUTEX_THREAD_LOCK(logp->dbenv, logp->mutexp);
+ if (--logp->dbentry[ndx].refcount == 0) {
+ TAILQ_INIT(&logp->dbentry[ndx].dblist);
+ logp->dbentry[ndx].deleted = 0;
+ } else if (dbp != NULL)
+ for (xdbp = TAILQ_FIRST(&logp->dbentry[ndx].dblist);
+ xdbp != NULL;
+ xdbp = TAILQ_NEXT(xdbp, links))
+ if (xdbp == dbp) {
+ TAILQ_REMOVE(&logp->dbentry[ndx].dblist,
+ xdbp, links);
+ break;
+ }
+
+ MUTEX_THREAD_UNLOCK(logp->dbenv, logp->mutexp);
+}
+
+/*
+ * __log_lid_to_fname --
+ * Traverse the shared-memory region looking for the entry that
+ * matches the passed log fileid. Returns 0 on success; -1 on error.
+ * PUBLIC: int __log_lid_to_fname __P((DB_LOG *, int32_t, FNAME **));
+ */
+int
+__log_lid_to_fname(dblp, lid, fnamep)
+ DB_LOG *dblp;
+ int32_t lid;
+ FNAME **fnamep;
+{
+ FNAME *fnp;
+ LOG *lp;
+
+ lp = dblp->reginfo.primary;
+
+ for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname);
+ fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
+ if (fnp->ref == 0) /* Entry not in use. */
+ continue;
+ if (fnp->id == lid) {
+ *fnamep = fnp;
+ return (0);
+ }
+ }
+ return (-1);
+}
diff --git a/bdb/log/log_register.c b/bdb/log/log_register.c
new file mode 100644
index 00000000000..1e0e523d8b9
--- /dev/null
+++ b/bdb/log/log_register.c
@@ -0,0 +1,433 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: log_register.c,v 11.35 2001/01/10 16:04:19 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#ifdef HAVE_RPC
+#include "db_server.h"
+#endif
+
+#include "db_int.h"
+#include "log.h"
+
+#ifdef HAVE_RPC
+#include "gen_client_ext.h"
+#include "rpc_client_ext.h"
+#endif
+
+/*
+ * log_register --
+ * Register a file name.
+ */
+int
+log_register(dbenv, dbp, name)
+ DB_ENV *dbenv;
+ DB *dbp;
+ const char *name;
+{
+ DBT fid_dbt, r_name;
+ DB_LOG *dblp;
+ DB_LSN r_unused;
+ FNAME *found_fnp, *fnp, *recover_fnp, *reuse_fnp;
+ LOG *lp;
+ size_t len;
+ int32_t maxid;
+ int inserted, ok, ret;
+ void *namep;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_log_register(dbenv, dbp, name));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG);
+
+ dblp = dbenv->lg_handle;
+ lp = dblp->reginfo.primary;
+ fnp = reuse_fnp = NULL;
+ inserted = ret = 0;
+ namep = NULL;
+
+ /* Check the arguments. */
+ if (dbp->type != DB_BTREE && dbp->type != DB_QUEUE &&
+ dbp->type != DB_HASH && dbp->type != DB_RECNO) {
+ __db_err(dbenv, "log_register: unknown DB file type");
+ return (EINVAL);
+ }
+
+ R_LOCK(dbenv, &dblp->reginfo);
+
+ /*
+ * See if we've already got this file in the log, finding the
+ * (maximum+1) in-use file id and some available file id (if we
+ * find an available fid, we'll use it, else we'll have to allocate
+ * one after the maximum that we found).
+ */
+ ok = 0;
+ found_fnp = recover_fnp = NULL;
+ for (maxid = 0, fnp = SH_TAILQ_FIRST(&lp->fq, __fname);
+ fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
+ if (F_ISSET(dblp, DBLOG_RECOVER) && fnp->id == dbp->log_fileid)
+ recover_fnp = fnp;
+ if (fnp->ref == 0) { /* Entry is not in use. */
+ if (reuse_fnp == NULL)
+ reuse_fnp = fnp;
+ continue;
+ }
+ if (memcmp(dbp->fileid, fnp->ufid, DB_FILE_ID_LEN) == 0) {
+ if (fnp->meta_pgno == 0) {
+ if (fnp->locked == 1) {
+ __db_err(dbenv, "File is locked");
+ return (EINVAL);
+ }
+ if (found_fnp != NULL) {
+ fnp = found_fnp;
+ goto found;
+ }
+ ok = 1;
+ }
+ if (dbp->meta_pgno == fnp->meta_pgno) {
+ if (F_ISSET(dblp, DBLOG_RECOVER)) {
+ if (fnp->id != dbp->log_fileid) {
+ /*
+ * If we are in recovery, there
+ * is only one dbp on the list.
+ * If the refcount goes to 0,
+ * we will clear the list. If
+ * it doesn't, we want to leave
+ * the dbp where it is, so
+ * passing a NULL to rem_logid
+ * is correct.
+ */
+ __log_rem_logid(dblp,
+ NULL, fnp->id);
+ if (recover_fnp != NULL)
+ break;
+ continue;
+ }
+ fnp->ref = 1;
+ goto found;
+ }
+ ++fnp->ref;
+ if (ok)
+ goto found;
+ found_fnp = fnp;
+ }
+ }
+ if (maxid <= fnp->id)
+ maxid = fnp->id + 1;
+ }
+ if ((fnp = found_fnp) != NULL)
+ goto found;
+
+ /* Fill in fnp structure. */
+ if (recover_fnp != NULL) /* This has the right number */
+ fnp = recover_fnp;
+ else if (reuse_fnp != NULL) /* Reuse existing one. */
+ fnp = reuse_fnp;
+ else { /* Allocate a new one. */
+ if ((ret = __db_shalloc(dblp->reginfo.addr,
+ sizeof(FNAME), 0, &fnp)) != 0)
+ goto mem_err;
+ fnp->id = maxid;
+ }
+
+ if (F_ISSET(dblp, DBLOG_RECOVER))
+ fnp->id = dbp->log_fileid;
+
+ fnp->ref = 1;
+ fnp->locked = 0;
+ fnp->s_type = dbp->type;
+ memcpy(fnp->ufid, dbp->fileid, DB_FILE_ID_LEN);
+ fnp->meta_pgno = dbp->meta_pgno;
+
+ if (name != NULL) {
+ len = strlen(name) + 1;
+ if ((ret =
+ __db_shalloc(dblp->reginfo.addr, len, 0, &namep)) != 0) {
+mem_err: __db_err(dbenv,
+ "Unable to allocate memory to register %s", name);
+ goto err;
+ }
+ fnp->name_off = R_OFFSET(&dblp->reginfo, namep);
+ memcpy(namep, name, len);
+ } else
+ fnp->name_off = INVALID_ROFF;
+
+ /* Only do the insert if we allocated a new fnp. */
+ if (reuse_fnp == NULL && recover_fnp == NULL)
+ SH_TAILQ_INSERT_HEAD(&lp->fq, fnp, q, __fname);
+ inserted = 1;
+
+ /* Log the registry. */
+ if (!F_ISSET(dblp, DBLOG_RECOVER)) {
+ /*
+ * We allow logging on in-memory databases, so the name here
+ * could be NULL.
+ */
+ if (name != NULL) {
+ r_name.data = (void *)name;
+ r_name.size = strlen(name) + 1;
+ }
+ memset(&fid_dbt, 0, sizeof(fid_dbt));
+ fid_dbt.data = dbp->fileid;
+ fid_dbt.size = DB_FILE_ID_LEN;
+ if ((ret = __log_register_log(dbenv, NULL, &r_unused,
+ 0, LOG_OPEN, name == NULL ? NULL : &r_name,
+ &fid_dbt, fnp->id, dbp->type, dbp->meta_pgno)) != 0)
+ goto err;
+ }
+
+found: /*
+ * If we found the entry in the shared area, then the file is
+ * already open, so there is no need to log the open. We only
+ * log the open and closes on the first open and last close.
+ */
+ if (!F_ISSET(dblp, DBLOG_RECOVER) &&
+ (ret = __log_add_logid(dbenv, dblp, dbp, fnp->id)) != 0)
+ goto err;
+
+ if (!F_ISSET(dblp, DBLOG_RECOVER))
+ dbp->log_fileid = fnp->id;
+
+ if (0) {
+err: if (inserted)
+ SH_TAILQ_REMOVE(&lp->fq, fnp, q, __fname);
+ if (namep != NULL)
+ __db_shalloc_free(dblp->reginfo.addr, namep);
+ if (fnp != NULL)
+ __db_shalloc_free(dblp->reginfo.addr, fnp);
+ }
+
+ R_UNLOCK(dbenv, &dblp->reginfo);
+
+ return (ret);
+}
+
+/*
+ * log_unregister --
+ * Discard a registered file name.
+ */
+int
+log_unregister(dbenv, dbp)
+ DB_ENV *dbenv;
+ DB *dbp;
+{
+ int ret;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_log_unregister(dbenv, dbp));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG);
+
+ ret = __log_filelist_update(dbenv, dbp, dbp->log_fileid, NULL, NULL);
+ dbp->log_fileid = DB_LOGFILEID_INVALID;
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __log_filelist_update
+ * PUBLIC: __P((DB_ENV *, DB *, int32_t, const char *, int *));
+ *
+ * Utility player for updating and logging the file list. Called
+ * for 3 reasons:
+ * 1) mark file closed: newname == NULL.
+ * 2) change filename: newname != NULL.
+ * 3) from recovery to verify & change filename if necessary, set != NULL.
+ */
+int
+__log_filelist_update(dbenv, dbp, fid, newname, set)
+ DB_ENV *dbenv;
+ DB *dbp;
+ int32_t fid;
+ const char *newname;
+ int *set;
+{
+ DBT fid_dbt, r_name;
+ DB_LOG *dblp;
+ DB_LSN r_unused;
+ FNAME *fnp;
+ LOG *lp;
+ u_int32_t len, newlen;
+ int ret;
+ void *namep;
+
+ ret = 0;
+ dblp = dbenv->lg_handle;
+ lp = dblp->reginfo.primary;
+
+ R_LOCK(dbenv, &dblp->reginfo);
+
+ /* Find the entry in the log. */
+ for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname);
+ fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname))
+ if (fid == fnp->id)
+ break;
+ if (fnp == NULL) {
+ __db_err(dbenv, "log_unregister: non-existent file id");
+ ret = EINVAL;
+ goto ret1;
+ }
+
+ /*
+ * Log the unregistry only if this is the last one and we are
+ * really closing the file or if this is an abort of a created
+ * file and we need to make sure there is a record in the log.
+ */
+ namep = NULL;
+ len = 0;
+ if (fnp->name_off != INVALID_ROFF) {
+ namep = R_ADDR(&dblp->reginfo, fnp->name_off);
+ len = strlen(namep) + 1;
+ }
+ if (!F_ISSET(dblp, DBLOG_RECOVER) && fnp->ref == 1) {
+ if (namep != NULL) {
+ memset(&r_name, 0, sizeof(r_name));
+ r_name.data = namep;
+ r_name.size = len;
+ }
+ memset(&fid_dbt, 0, sizeof(fid_dbt));
+ fid_dbt.data = fnp->ufid;
+ fid_dbt.size = DB_FILE_ID_LEN;
+ if ((ret = __log_register_log(dbenv, NULL, &r_unused,
+ 0, LOG_CLOSE,
+ fnp->name_off == INVALID_ROFF ? NULL : &r_name,
+ &fid_dbt, fid, fnp->s_type, fnp->meta_pgno))
+ != 0)
+ goto ret1;
+ }
+
+ /*
+ * If we are changing the name we must log this fact.
+ */
+ if (newname != NULL) {
+ DB_ASSERT(fnp->ref == 1);
+ newlen = strlen(newname) + 1;
+ if (!F_ISSET(dblp, DBLOG_RECOVER)) {
+ r_name.data = (void *) newname;
+ r_name.size = newlen;
+ if ((ret = __log_register_log(dbenv,
+ NULL, &r_unused, 0, LOG_OPEN, &r_name, &fid_dbt,
+ fnp->id, fnp->s_type, fnp->meta_pgno)) != 0)
+ goto ret1;
+ }
+
+ /*
+ * Check to see if the name is already correct.
+ */
+ if (set != NULL) {
+ if (len != newlen || memcmp(namep, newname, len) != 0)
+ *set = 1;
+ else {
+ *set = 0;
+ goto ret1;
+ }
+ }
+
+ /*
+ * Change the name, realloc memory if necessary
+ */
+ if (len < newlen) {
+ __db_shalloc_free(dblp->reginfo.addr,
+ R_ADDR(&dblp->reginfo, fnp->name_off));
+ if ((ret = __db_shalloc(
+ dblp->reginfo.addr, newlen, 0, &namep)) != 0) {
+ __db_err(dbenv,
+ "Unable to allocate memory to register %s",
+ newname);
+ goto ret1;
+ }
+ fnp->name_off = R_OFFSET(&dblp->reginfo, namep);
+ } else
+ namep = R_ADDR(&dblp->reginfo, fnp->name_off);
+ memcpy(namep, newname, newlen);
+ } else {
+
+ /*
+ * If more than 1 reference, just decrement the reference
+ * and return. Otherwise, free the name if one exists.
+ */
+ DB_ASSERT(fnp->ref >= 1);
+ --fnp->ref;
+ if (fnp->ref == 0) {
+ if (fnp->name_off != INVALID_ROFF)
+ __db_shalloc_free(dblp->reginfo.addr,
+ R_ADDR(&dblp->reginfo, fnp->name_off));
+ fnp->name_off = INVALID_ROFF;
+ }
+
+ /*
+ * Remove from the process local table. If this
+ * operation is taking place during recovery, then
+ * the logid was never added to the table, so do not remove it.
+ */
+ if (!F_ISSET(dblp, DBLOG_RECOVER))
+ __log_rem_logid(dblp, dbp, fid);
+ }
+
+ret1: R_UNLOCK(dbenv, &dblp->reginfo);
+ return (ret);
+}
+
+/*
+ * __log_file_lock -- lock a file for single access
+ * This only works if logging is on.
+ *
+ * PUBLIC: int __log_file_lock __P((DB *));
+ */
+int
+__log_file_lock(dbp)
+ DB *dbp;
+{
+ DB_ENV *dbenv;
+ DB_LOG *dblp;
+ FNAME *fnp;
+ LOG *lp;
+ int ret;
+
+ dbenv = dbp->dbenv;
+ dblp = dbenv->lg_handle;
+ lp = dblp->reginfo.primary;
+
+ ret = 0;
+ R_LOCK(dbenv, &dblp->reginfo);
+
+ for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname);
+ fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
+ if (fnp->ref == 0)
+ continue;
+
+ if (!memcmp(dbp->fileid, fnp->ufid, DB_FILE_ID_LEN)) {
+ if (fnp->meta_pgno == 0) {
+ if (fnp->ref != 1)
+ goto err;
+
+ fnp->locked = 1;
+ } else {
+err: __db_err(dbp->dbenv, "File is open");
+ ret = EINVAL;
+ goto done;
+ }
+
+ }
+ }
+done: R_UNLOCK(dbenv, &dblp->reginfo);
+ return (ret);
+}