summaryrefslogtreecommitdiff
path: root/src/log
diff options
context:
space:
mode:
authorLorry <lorry@roadtrain.codethink.co.uk>2012-07-20 20:00:05 +0100
committerLorry <lorry@roadtrain.codethink.co.uk>2012-07-20 20:00:05 +0100
commit3ef782d3745ea8f25a3151561a3cfb882190210e (patch)
tree86b9c2f5fde051dd0bced99b3fc9f5a3ba08db69 /src/log
downloadberkeleydb-3ef782d3745ea8f25a3151561a3cfb882190210e.tar.gz
Tarball conversion
Diffstat (limited to 'src/log')
-rw-r--r--src/log/log.c1727
-rw-r--r--src/log/log_archive.c643
-rw-r--r--src/log/log_compare.c66
-rw-r--r--src/log/log_debug.c146
-rw-r--r--src/log/log_get.c1626
-rw-r--r--src/log/log_method.c533
-rw-r--r--src/log/log_print.c380
-rw-r--r--src/log/log_put.c2041
-rw-r--r--src/log/log_stat.c336
-rw-r--r--src/log/log_verify.c437
-rw-r--r--src/log/log_verify_auto.c318
-rw-r--r--src/log/log_verify_int.c4353
-rw-r--r--src/log/log_verify_stub.c79
-rw-r--r--src/log/log_verify_util.c2234
14 files changed, 14919 insertions, 0 deletions
diff --git a/src/log/log.c b/src/log/log.c
new file mode 100644
index 00000000..5808145f
--- /dev/null
+++ b/src/log/log.c
@@ -0,0 +1,1727 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/crypto.h"
+#include "dbinc/hmac.h"
+#include "dbinc/log.h"
+#include "dbinc/txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+
+static int __log_init __P((ENV *, DB_LOG *));
+static int __log_recover __P((DB_LOG *));
+
+/*
+ * __log_open --
+ * Internal version of log_open: only called from ENV->open.
+ *
+ * PUBLIC: int __log_open __P((ENV *));
+ */
+int
+__log_open(env)
+ ENV *env;
+{
+ DB_ENV *dbenv;
+ DB_LOG *dblp;
+ LOG *lp;
+ u_int8_t *bulk;
+ int region_locked, ret;
+
+ dbenv = env->dbenv;
+ region_locked = 0;
+
+ /* Create/initialize the DB_LOG structure. */
+ if ((ret = __os_calloc(env, 1, sizeof(DB_LOG), &dblp)) != 0)
+ return (ret);
+ dblp->env = env;
+
+ /* Join/create the log region. */
+ if ((ret = __env_region_share(env, &dblp->reginfo)) != 0)
+ goto err;
+
+ /* If we created the region, initialize it. */
+ if (F_ISSET(&dblp->reginfo, REGION_CREATE))
+ if ((ret = __log_init(env, dblp)) != 0)
+ goto err;
+
+ /* Set the local addresses. */
+ lp = dblp->reginfo.primary = R_ADDR(&dblp->reginfo,
+ ((REGENV *)env->reginfo->primary)->lg_primary);
+ dblp->bufp = R_ADDR(&dblp->reginfo, lp->buffer_off);
+
+ /*
+ * If the region is threaded, we have to lock the DBREG list, and we
+ * need to allocate a mutex for that purpose.
+ */
+ if ((ret = __mutex_alloc(env,
+ MTX_LOG_REGION, DB_MUTEX_PROCESS_ONLY, &dblp->mtx_dbreg)) != 0)
+ goto err;
+
+ /*
+ * Set the handle -- we may be about to run recovery, which allocates
+ * log cursors. Log cursors require logging be already configured,
+ * and the handle being set is what demonstrates that.
+ *
+ * If we created the region, run recovery. If that fails, make sure
+ * we reset the log handle before cleaning up, otherwise we will try
+ * and clean up again in the mainline ENV initialization code.
+ */
+ env->lg_handle = dblp;
+
+ if (F_ISSET(&dblp->reginfo, REGION_CREATE)) {
+ /*
+ * We first take the log file size from the environment, if
+ * specified. If that wasn't set, default it. Regardless,
+ * recovery may set it from the persistent information in a
+ * log file header.
+ */
+ if (lp->log_size == 0)
+ lp->log_size =
+ FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) ?
+ LG_MAX_INMEM : LG_MAX_DEFAULT;
+
+ if ((ret = __log_recover(dblp)) != 0)
+ goto err;
+
+ /*
+ * If the next log file size hasn't been set yet, default it
+ * to the current log file size.
+ */
+ if (lp->log_nsize == 0)
+ lp->log_nsize = lp->log_size;
+
+ /*
+ * If we haven't written any log files, write the first one
+ * so that checkpoint gets a valid ckp_lsn value.
+ */
+ if (IS_INIT_LSN(lp->lsn) &&
+ (ret = __log_newfile(dblp, NULL, 0, 0)) != 0)
+ goto err;
+
+ /*
+ * Initialize replication's next-expected LSN value
+ * and replication's bulk buffer. In __env_open, we
+ * always create/open the replication region before
+ * the log region so we're assured that our rep_handle
+ * is valid at this point, if replication is being used.
+ */
+ lp->ready_lsn = lp->lsn;
+ if (IS_ENV_REPLICATED(env)) {
+ if ((ret =
+ __env_alloc(&dblp->reginfo, MEGABYTE, &bulk)) != 0)
+ goto err;
+ lp->bulk_buf = R_OFFSET(&dblp->reginfo, bulk);
+ lp->bulk_len = MEGABYTE;
+ lp->bulk_off = 0;
+ lp->wait_ts = env->rep_handle->request_gap;
+ __os_gettime(env, &lp->rcvd_ts, 1);
+ } else {
+ lp->bulk_buf = INVALID_ROFF;
+ lp->bulk_len = 0;
+ lp->bulk_off = 0;
+ }
+ } else {
+ /*
+ * A process joining the region may have reset the log file
+ * size, too. If so, it only affects the next log file we
+ * create. We need to check that the size is reasonable given
+ * the buffer size in the region.
+ */
+ LOG_SYSTEM_LOCK(env);
+ region_locked = 1;
+
+ if (dbenv->lg_size != 0) {
+ if ((ret =
+ __log_check_sizes(env, dbenv->lg_size, 0)) != 0)
+ goto err;
+
+ lp->log_nsize = dbenv->lg_size;
+ }
+
+ LOG_SYSTEM_UNLOCK(env);
+ region_locked = 0;
+
+ if (dbenv->lg_flags != 0 && (ret =
+ __log_set_config_int(dbenv, dbenv->lg_flags, 1, 0)) != 0)
+ return (ret);
+ }
+ dblp->reginfo.mtx_alloc = lp->mtx_region;
+
+ return (0);
+
+err: if (dblp->reginfo.addr != NULL) {
+ if (region_locked)
+ LOG_SYSTEM_UNLOCK(env);
+ (void)__env_region_detach(env, &dblp->reginfo, 0);
+ }
+ env->lg_handle = NULL;
+
+ (void)__mutex_free(env, &dblp->mtx_dbreg);
+ __os_free(env, dblp);
+
+ return (ret);
+}
+
+/*
+ * __log_init --
+ * Initialize a log region in shared memory.
+ */
+static int
+__log_init(env, dblp)
+ ENV *env;
+ DB_LOG *dblp;
+{
+ DB_ENV *dbenv;
+ LOG *lp;
+ int ret;
+ void *p;
+
+ dbenv = env->dbenv;
+
+ /*
+ * This is the first point where we can validate the buffer size,
+ * because we know all three settings have been configured (file size,
+ * buffer size and the in-memory flag).
+ */
+ if ((ret =
+ __log_check_sizes(env, dbenv->lg_size, dbenv->lg_bsize)) != 0)
+ return (ret);
+
+ if ((ret = __env_alloc(&dblp->reginfo,
+ sizeof(*lp), &dblp->reginfo.primary)) != 0)
+ goto mem_err;
+
+ ((REGENV *)env->reginfo->primary)->lg_primary =
+ R_OFFSET(&dblp->reginfo, dblp->reginfo.primary);
+
+ lp = dblp->reginfo.primary;
+ memset(lp, 0, sizeof(*lp));
+
+ /* We share the region so we need the same mutex. */
+ lp->mtx_region = ((REGENV *)env->reginfo->primary)->mtx_regenv;
+
+ lp->fid_max = 0;
+ SH_TAILQ_INIT(&lp->fq);
+ lp->free_fid_stack = INVALID_ROFF;
+ lp->free_fids = lp->free_fids_alloced = 0;
+
+ /* Initialize LOG LSNs. */
+ INIT_LSN(lp->lsn);
+ INIT_LSN(lp->t_lsn);
+
+ /*
+ * It's possible to be waiting for an LSN of [1][0], if a replication
+ * client gets the first log record out of order. An LSN of [0][0]
+ * signifies that we're not waiting.
+ */
+ ZERO_LSN(lp->waiting_lsn);
+
+ /*
+ * Log makes note of the fact that it ran into a checkpoint on
+ * startup if it did so, as a recovery optimization. A zero
+ * LSN signifies that it hasn't found one [yet].
+ */
+ ZERO_LSN(lp->cached_ckp_lsn);
+
+ if ((ret =
+ __mutex_alloc(env, MTX_LOG_FILENAME, 0, &lp->mtx_filelist)) != 0)
+ return (ret);
+ if ((ret = __mutex_alloc(env, MTX_LOG_FLUSH, 0, &lp->mtx_flush)) != 0)
+ return (ret);
+
+ /* Initialize the buffer. */
+ if ((ret = __env_alloc(&dblp->reginfo, dbenv->lg_bsize, &p)) != 0) {
+mem_err: __db_errx( env, DB_STR("2524",
+ "unable to allocate log region memory"));
+ return (ret);
+ }
+ lp->regionmax = dbenv->lg_regionmax;
+ lp->buffer_off = R_OFFSET(&dblp->reginfo, p);
+ lp->buffer_size = dbenv->lg_bsize;
+ lp->filemode = dbenv->lg_filemode;
+ lp->log_size = lp->log_nsize = dbenv->lg_size;
+ lp->stat.st_fileid_init = dbenv->lg_fileid_init;
+
+ /* Initialize the commit Queue. */
+ SH_TAILQ_INIT(&lp->free_commits);
+ SH_TAILQ_INIT(&lp->commits);
+ lp->ncommit = 0;
+
+ /* Initialize the logfiles list for in-memory logs. */
+ SH_TAILQ_INIT(&lp->logfiles);
+ SH_TAILQ_INIT(&lp->free_logfiles);
+
+ /*
+ * Fill in the log's persistent header. Don't fill in the log file
+ * sizes, as they may change at any time and so have to be filled in
+ * as each log file is created.
+ */
+ lp->persist.magic = DB_LOGMAGIC;
+ /*
+ * Don't use __log_set_version because env->dblp isn't set up yet.
+ */
+ lp->persist.version = DB_LOGVERSION;
+ lp->persist.notused = 0;
+ env->lg_handle = dblp;
+
+ /* Migrate persistent flags from the ENV into the region. */
+ if (dbenv->lg_flags != 0 &&
+ (ret = __log_set_config_int(dbenv, dbenv->lg_flags, 1, 1)) != 0)
+ return (ret);
+
+ (void)time(&lp->timestamp);
+ return (0);
+}
+
+/*
+ * __log_recover --
+ * Recover a log.
+ */
+static int
+__log_recover(dblp)
+ DB_LOG *dblp;
+{
+ DBT dbt;
+ DB_ENV *dbenv;
+ DB_LOGC *logc;
+ DB_LSN lsn;
+ ENV *env;
+ LOG *lp;
+ u_int32_t cnt, rectype;
+ int ret;
+ logfile_validity status;
+
+ env = dblp->env;
+ dbenv = env->dbenv;
+ logc = NULL;
+ lp = dblp->reginfo.primary;
+
+ /*
+ * Find a log file. If none exist, we simply return, leaving
+ * everything initialized to a new log.
+ */
+ if ((ret = __log_find(dblp, 0, &cnt, &status)) != 0)
+ return (ret);
+ if (cnt == 0) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY))
+ __db_msg(env, DB_STR("2525", "No log files found"));
+ return (0);
+ }
+
+ /*
+ * If the last file is an old, unreadable version, start a new
+ * file. Don't bother finding the end of the last log file;
+ * we assume that it's valid in its entirety, since the user
+ * should have shut down cleanly or run recovery before upgrading.
+ */
+ if (status == DB_LV_OLD_UNREADABLE) {
+ lp->lsn.file = lp->s_lsn.file = cnt + 1;
+ lp->lsn.offset = lp->s_lsn.offset = 0;
+ goto skipsearch;
+ }
+ DB_ASSERT(env,
+ (status == DB_LV_NORMAL || status == DB_LV_OLD_READABLE));
+
+ /*
+ * We have the last useful log file and we've loaded any persistent
+ * information. Set the end point of the log past the end of the last
+ * file. Read the last file, looking for the last checkpoint and
+ * the log's end.
+ */
+ lp->lsn.file = cnt + 1;
+ lp->lsn.offset = 0;
+ lsn.file = cnt;
+ lsn.offset = 0;
+
+ /*
+ * Allocate a cursor and set it to the first record. This shouldn't
+ * fail, leave error messages on.
+ */
+ if ((ret = __log_cursor(env, &logc)) != 0)
+ return (ret);
+ F_SET(logc, DB_LOG_LOCKED);
+ memset(&dbt, 0, sizeof(dbt));
+ if ((ret = __logc_get(logc, &lsn, &dbt, DB_SET)) != 0)
+ goto err;
+
+ /*
+ * Read to the end of the file. This may fail at some point, so
+ * turn off error messages.
+ */
+ F_SET(logc, DB_LOG_SILENT_ERR);
+ while (__logc_get(logc, &lsn, &dbt, DB_NEXT) == 0) {
+ if (dbt.size < sizeof(u_int32_t))
+ continue;
+ LOGCOPY_32(env, &rectype, dbt.data);
+ if (rectype == DB___txn_ckp)
+ /*
+ * If we happen to run into a checkpoint, cache its
+ * LSN so that the transaction system doesn't have
+ * to walk this log file again looking for it.
+ */
+ lp->cached_ckp_lsn = lsn;
+ }
+ F_CLR(logc, DB_LOG_SILENT_ERR);
+
+ /*
+ * We now know where the end of the log is. Set the first LSN that
+ * we want to return to an application and the LSN of the last known
+ * record on disk.
+ */
+ lp->lsn = lsn;
+ lp->s_lsn = lsn;
+ lp->lsn.offset += logc->len;
+ lp->s_lsn.offset += logc->len;
+
+ /* Set up the current buffer information, too. */
+ lp->len = logc->len;
+ lp->a_off = 0;
+ lp->b_off = 0;
+ lp->w_off = lp->lsn.offset;
+
+skipsearch:
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY))
+ __db_msg(env, DB_STR_A("2526",
+ "Finding last valid log LSN: file: %lu offset %lu",
+ "%lu %lu"), (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
+
+err: if (logc != NULL)
+ (void)__logc_close(logc);
+
+ return (ret);
+}
+
+/*
+ * __log_find --
+ * Try to find a log file. If find_first is set, valp will contain
+ * the number of the first readable log file, else it will contain the number
+ * of the last log file (which may be too old to read).
+ *
+ * PUBLIC: int __log_find __P((DB_LOG *, int, u_int32_t *, logfile_validity *));
+ */
+int
+__log_find(dblp, find_first, valp, statusp)
+ DB_LOG *dblp;
+ int find_first;
+ u_int32_t *valp;
+ logfile_validity *statusp;
+{
+ ENV *env;
+ LOG *lp;
+ logfile_validity logval_status, status;
+ struct __db_filestart *filestart;
+ u_int32_t clv, logval;
+ int cnt, fcnt, ret;
+ const char *dir;
+ char *c, **names, *p, *q;
+
+ env = dblp->env;
+ lp = dblp->reginfo.primary;
+ logval_status = status = DB_LV_NONEXISTENT;
+
+ /* Return a value of 0 as the log file number on failure. */
+ *valp = 0;
+
+ if (lp->db_log_inmemory) {
+ filestart = find_first ?
+ SH_TAILQ_FIRST(&lp->logfiles, __db_filestart) :
+ SH_TAILQ_LAST(&lp->logfiles, links, __db_filestart);
+ if (filestart != NULL) {
+ *valp = filestart->file;
+ logval_status = DB_LV_NORMAL;
+ }
+ *statusp = logval_status;
+ return (0);
+ }
+
+ /* Find the directory name. */
+ if ((ret = __log_name(dblp, 1, &p, NULL, 0)) != 0) {
+ __os_free(env, p);
+ return (ret);
+ }
+ if ((q = __db_rpath(p)) == NULL)
+ dir = PATH_DOT;
+ else {
+ *q = '\0';
+ dir = p;
+ }
+
+ /* Get the list of file names. */
+retry: if ((ret = __os_dirlist(env, dir, 0, &names, &fcnt)) != 0) {
+ __db_err(env, ret, "%s", dir);
+ __os_free(env, p);
+ return (ret);
+ }
+
+ /* Search for a valid log file name. */
+ for (cnt = fcnt, clv = logval = 0; --cnt >= 0;) {
+ if (!IS_LOG_FILE(names[cnt]))
+ continue;
+
+ /*
+ * Names of the form log\.[0-9]* are reserved for DB. Other
+ * names sharing LFPREFIX, such as "log.db", are legal.
+ */
+ for (c = names[cnt] + sizeof(LFPREFIX) - 1; *c != '\0'; c++)
+ if (!isdigit((int)*c))
+ break;
+ if (*c != '\0')
+ continue;
+
+ /*
+ * Use atol, not atoi; if an "int" is 16-bits, the largest
+ * log file name won't fit.
+ */
+ clv = (u_int32_t)atol(names[cnt] + (sizeof(LFPREFIX) - 1));
+
+ /*
+ * If searching for the first log file, we want to return the
+ * oldest log file we can read, or, if no readable log files
+ * exist, the newest log file we can't read (the crossover
+ * point between the old and new versions of the log file).
+ *
+ * If we're searching for the last log file, we want to return
+ * the newest log file, period.
+ *
+ * Readable log files should never precede unreadable log
+ * files, that would mean the admin seriously screwed up.
+ */
+ if (find_first) {
+ if (logval != 0 &&
+ status != DB_LV_OLD_UNREADABLE && clv > logval)
+ continue;
+ } else
+ if (logval != 0 && clv < logval)
+ continue;
+
+ if ((ret = __log_valid(dblp, clv, 1, NULL, 0,
+ &status, NULL)) != 0) {
+ /*
+ * If we have raced with removal of a log file since
+ * the call to __os_dirlist, it may no longer exist.
+ * In that case, just go on to the next one. If we're
+ * at the end of the list, all of the log files we saw
+ * initially are gone and we need to get the list again.
+ */
+ if (ret == ENOENT) {
+ ret = 0;
+ if (cnt == 0) {
+ __os_dirfree(env, names, fcnt);
+ goto retry;
+ }
+ continue;
+ }
+ __db_err(env, ret, DB_STR_A("2527",
+ "Invalid log file: %s", "%s"), names[cnt]);
+ goto err;
+ }
+ switch (status) {
+ case DB_LV_NONEXISTENT:
+ /* __log_valid never returns DB_LV_NONEXISTENT. */
+ DB_ASSERT(env, 0);
+ break;
+ case DB_LV_INCOMPLETE:
+ /*
+ * The last log file may not have been initialized --
+ * it's possible to create a log file but not write
+ * anything to it. If performing recovery (that is,
+ * if find_first isn't set), ignore the file, it's
+ * not interesting. If we're searching for the first
+ * log record, return the file (assuming we don't find
+ * something better), as the "real" first log record
+ * is likely to be in the log buffer, and we want to
+ * set the file LSN for our return.
+ */
+ if (find_first)
+ goto found;
+ break;
+ case DB_LV_OLD_UNREADABLE:
+ /*
+ * If we're searching for the first log file, then we
+ * only want this file if we don't yet have a file or
+ * already have an unreadable file and this one is
+ * newer than that one. If we're searching for the
+ * last log file, we always want this file because we
+ * wouldn't be here if it wasn't newer than our current
+ * choice.
+ */
+ if (!find_first || logval == 0 ||
+ (status == DB_LV_OLD_UNREADABLE && clv > logval))
+ goto found;
+ break;
+ case DB_LV_NORMAL:
+ case DB_LV_OLD_READABLE:
+found: logval = clv;
+ logval_status = status;
+ break;
+ }
+ }
+
+ *valp = logval;
+
+err: __os_dirfree(env, names, fcnt);
+ __os_free(env, p);
+ *statusp = logval_status;
+
+ return (ret);
+}
+
+/*
+ * log_valid --
+ * Validate a log file. Returns an error code in the event of
+ * a fatal flaw in a the specified log file; returns success with
+ * a code indicating the currentness and completeness of the specified
+ * log file if it is not unexpectedly flawed (that is, if it's perfectly
+ * normal, if it's zero-length, or if it's an old version).
+ *
+ * PUBLIC: int __log_valid __P((DB_LOG *, u_int32_t, int,
+ * PUBLIC: DB_FH **, u_int32_t, logfile_validity *, u_int32_t *));
+ */
+int
+__log_valid(dblp, number, set_persist, fhpp, flags, statusp, versionp)
+ DB_LOG *dblp;
+ u_int32_t number;
+ int set_persist;
+ DB_FH **fhpp;
+ u_int32_t flags;
+ logfile_validity *statusp;
+ u_int32_t *versionp;
+{
+ DB_CIPHER *db_cipher;
+ DB_FH *fhp;
+ ENV *env;
+ HDR *hdr;
+ LOG *lp;
+ LOGP *persist;
+ logfile_validity status;
+ size_t hdrsize, nr, recsize;
+ int chksum_includes_hdr, is_hmac, ret;
+ u_int32_t logversion;
+ u_int8_t *tmp;
+ char *fname;
+
+ env = dblp->env;
+ db_cipher = env->crypto_handle;
+ fhp = NULL;
+ persist = NULL;
+ status = DB_LV_NORMAL;
+ tmp = NULL;
+#if defined(HAVE_LOG_CHECKSUM)
+ /* Most log versions include the hdr in the checksum. */
+ chksum_includes_hdr = 1;
+#else
+ COMPQUIET(chksum_includes_hdr, 0);
+#endif
+
+ /* Return the file handle to our caller, on request */
+ if (fhpp != NULL)
+ *fhpp = NULL;
+
+ if (flags == 0)
+ flags = DB_OSO_RDONLY | DB_OSO_SEQ;
+ /* Try to open the log file. */
+ if ((ret = __log_name(dblp, number, &fname, &fhp, flags)) != 0) {
+ __os_free(env, fname);
+ return (ret);
+ }
+
+ hdrsize = HDR_NORMAL_SZ;
+ is_hmac = 0;
+ recsize = sizeof(LOGP);
+ if (CRYPTO_ON(env)) {
+ hdrsize = HDR_CRYPTO_SZ;
+ recsize = sizeof(LOGP);
+ recsize += db_cipher->adj_size(recsize);
+ is_hmac = 1;
+ }
+ if ((ret = __os_calloc(env, 1, recsize + hdrsize, &tmp)) != 0)
+ goto err;
+
+ hdr = (HDR *)tmp;
+ persist = (LOGP *)(tmp + hdrsize);
+
+ /*
+ * Try to read the header. This can fail if the log is truncated, or
+ * if we find a preallocated log file where the header has not yet been
+ * written, so we need to check whether the header is zero-filled.
+ */
+ if ((ret = __os_read(env, fhp, tmp, recsize + hdrsize, &nr)) != 0 ||
+ nr != recsize + hdrsize ||
+ (hdr->len == 0 && persist->magic == 0 && persist->log_size == 0)) {
+ if (ret == 0)
+ status = DB_LV_INCOMPLETE;
+ else
+ /*
+ * The error was a fatal read error, not just an
+ * incompletely initialized log file.
+ */
+ __db_err(env, ret, DB_STR_A("2528",
+ "ignoring log file: %s", "%s"), fname);
+ goto err;
+ }
+
+ if (LOG_SWAPPED(env))
+ __log_hdrswap(hdr, CRYPTO_ON(env));
+
+ /*
+ * Now we have to validate the persistent record. We have
+ * several scenarios we have to deal with:
+ *
+ * 1. User has crypto turned on:
+ * - They're reading an old, unencrypted log file
+ * . We will fail the record size match check below.
+ * - They're reading a current, unencrypted log file
+ * . We will fail the record size match check below.
+ * - They're reading an old, encrypted log file [NOT YET]
+ * . After decryption we'll fail the version check. [NOT YET]
+ * - They're reading a current, encrypted log file
+ * . We should proceed as usual.
+ * 2. User has crypto turned off:
+ * - They're reading an old, unencrypted log file
+ * . We will fail the version check.
+ * - They're reading a current, unencrypted log file
+ * . We should proceed as usual.
+ * - They're reading an old, encrypted log file [NOT YET]
+ * . We'll fail the magic number check (it is encrypted).
+ * - They're reading a current, encrypted log file
+ * . We'll fail the magic number check (it is encrypted).
+ */
+ if (CRYPTO_ON(env)) {
+ /*
+ * If we are trying to decrypt an unencrypted log
+ * we can only detect that by having an unreasonable
+ * data length for our persistent data.
+ */
+ if ((hdr->len - hdrsize) != sizeof(LOGP)) {
+ __db_errx(env, "log record size mismatch");
+ goto err;
+ }
+ /*
+ * The checksum is calculated from the encrypted data, and,
+ * for recent logs, the fields hdr->{prev,len}.
+ */
+#ifdef HAVE_LOG_CHECKSUM
+ if ((ret = __db_check_chksum(env, hdr, db_cipher,
+ &hdr->chksum[0], (u_int8_t *)persist,
+ hdr->len - hdrsize, is_hmac)) != 0) {
+ /*
+ * The checksum doesn't verify when the header fields
+ * are included; try without the header.
+ */
+
+ if ((ret = __db_check_chksum(env, NULL, db_cipher,
+ &hdr->chksum[0], (u_int8_t *)persist,
+ hdr->len - hdrsize, is_hmac)) != 0)
+ goto bad_checksum;
+ /*
+ * The checksum verifies without the header. Make note
+ * of that, because it is only acceptable when the log
+ * version < DB_LOGCHKSUM. Later, when we determine log
+ * version, we will confirm this.
+ */
+ chksum_includes_hdr = 0;
+ }
+#endif
+
+ if ((ret = db_cipher->decrypt(env, db_cipher->data,
+ &hdr->iv[0], (u_int8_t *)persist, hdr->len - hdrsize)) != 0)
+ goto err;
+ }
+
+ /* Swap the header, if necessary. */
+ if (LOG_SWAPPED(env)) {
+ /*
+ * If the magic number is not byte-swapped, we're looking at an
+ * old log that we can no longer read.
+ */
+ if (persist->magic == DB_LOGMAGIC) {
+ __db_errx(env, DB_STR_A("2529",
+ "Ignoring log file: %s historic byte order",
+ "%s"), fname);
+ status = DB_LV_OLD_UNREADABLE;
+ goto err;
+ }
+
+ __log_persistswap(persist);
+ }
+
+ /* Validate the header. */
+ if (persist->magic != DB_LOGMAGIC) {
+ __db_errx(env, DB_STR_A("2530",
+ "Ignoring log file: %s: magic number %lx, not %lx",
+ "%s %lx %lx"), fname,
+ (u_long)persist->magic, (u_long)DB_LOGMAGIC);
+ ret = EINVAL;
+ goto err;
+ }
+
+ logversion = persist->version;
+ /*
+ * Set our status code to indicate whether the log file belongs to an
+ * unreadable or readable old version; leave it alone if and only if
+ * the log file version is the current one.
+ */
+ if (logversion > DB_LOGVERSION) {
+ /* This is a fatal error--the log file is newer than DB. */
+ __db_errx(env, DB_STR_A("2531",
+ "Unacceptable log file %s: unsupported log version %lu",
+ "%s %lu"), fname, (u_long)logversion);
+ ret = EINVAL;
+ goto err;
+ } else if (logversion < DB_LOGOLDVER) {
+ status = DB_LV_OLD_UNREADABLE;
+ /* This is a non-fatal error, but give some feedback. */
+ __db_errx(env, DB_STR_A("2532",
+ "Skipping log file %s: historic log version %lu", "%s %lu"),
+ fname, (u_long)logversion);
+ /*
+ * We don't want to set persistent info based on an unreadable
+ * region, so jump to "err".
+ */
+ goto err;
+ } else if (logversion < DB_LOGVERSION)
+ status = DB_LV_OLD_READABLE;
+
+ /*
+ * We could not check the checksum before checking the magic and version
+ * because old log headers put the length and checksum in a different
+ * location.
+ */
+#ifdef HAVE_LOG_CHECKSUM
+ if (CRYPTO_ON(env)) {
+ /*
+ * We might have to declare a checksum failure here, if:
+ * - the checksum verified only by ignoring the header, and
+ * - the log version indicates that the header should have
+ * been included.
+ */
+ if (!chksum_includes_hdr && logversion >= DB_LOGCHKSUM)
+ goto bad_checksum;
+ } else {
+ /*
+ * The checksum was calculated with the swapped byte order. We
+ * might need to swap them back; the check needs the same bytes.
+ */
+ if (LOG_SWAPPED(env))
+ __log_persistswap(persist);
+ /*
+ * We have the logversion here, so we know whether to include
+ * the hdr or not.
+ */
+ if ((ret = __db_check_chksum(env,
+ logversion >= DB_LOGCHKSUM ? hdr : NULL, db_cipher,
+ &hdr->chksum[0], (u_int8_t *)persist,
+ hdr->len - hdrsize, is_hmac)) != 0) {
+bad_checksum:
+ __db_errx(env, DB_STR("2533",
+ "log record checksum mismatch"));
+ goto err;
+ }
+
+ if (LOG_SWAPPED(env))
+ __log_persistswap(persist);
+ }
+#endif
+
+ /*
+ * If the log is readable so far and we're doing system initialization,
+ * set the region's persistent information based on the headers.
+ *
+ * Override the current log file size.
+ */
+ if (set_persist) {
+ lp = dblp->reginfo.primary;
+ lp->log_size = persist->log_size;
+ lp->persist.version = logversion;
+ }
+ if (versionp != NULL)
+ *versionp = logversion;
+
+err: if (fname != NULL)
+ __os_free(env, fname);
+ if (ret == 0 && fhpp != NULL)
+ *fhpp = fhp;
+ else
+ /* Must close on error or if we only used it locally. */
+ (void)__os_closehandle(env, fhp);
+ if (tmp != NULL)
+ __os_free(env, tmp);
+
+ if (statusp != NULL)
+ *statusp = status;
+
+ return (ret);
+}
+
+/*
+ * __log_env_refresh --
+ * Clean up after the log system on a close or failed open.
+ *
+ * PUBLIC: int __log_env_refresh __P((ENV *));
+ */
+int
+__log_env_refresh(env)
+ ENV *env;
+{
+ DB_LOG *dblp;
+ LOG *lp;
+ REGINFO *reginfo;
+ struct __fname *fnp;
+ struct __db_commit *commit;
+ struct __db_filestart *filestart;
+ int ret, t_ret;
+
+ dblp = env->lg_handle;
+ reginfo = &dblp->reginfo;
+ lp = reginfo->primary;
+ ret = 0;
+
+ /*
+ * Flush the log if it's private -- there's no Berkeley DB guarantee
+ * that this gets done, but in case the application has forgotten to
+ * flush for durability, it's the polite thing to do.
+ */
+ if (F_ISSET(env, ENV_PRIVATE) &&
+ (t_ret = __log_flush(env, NULL)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if ((t_ret = __dbreg_close_files(env, 0)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /*
+ * After we close the files, check for any unlogged closes left in
+ * the shared memory queue. If we find any, try to log it, otherwise
+ * return the error. We cannot say the environment was closed
+ * cleanly.
+ */
+ MUTEX_LOCK(env, lp->mtx_filelist);
+ SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname)
+ if (F_ISSET(fnp, DB_FNAME_NOTLOGGED) &&
+ (t_ret = __dbreg_close_id_int(
+ env, fnp, DBREG_CLOSE, 1)) != 0)
+ ret = t_ret;
+ MUTEX_UNLOCK(env, lp->mtx_filelist);
+
+ /*
+ * If a private region, return the memory to the heap. Not needed for
+ * filesystem-backed or system shared memory regions, that memory isn't
+ * owned by any particular process.
+ */
+ if (F_ISSET(env, ENV_PRIVATE)) {
+ reginfo->mtx_alloc = MUTEX_INVALID;
+ /* Discard the flush mutex. */
+ if ((t_ret =
+ __mutex_free(env, &lp->mtx_flush)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /* Discard the buffer. */
+ __env_alloc_free(reginfo, R_ADDR(reginfo, lp->buffer_off));
+
+ /* Discard stack of free file IDs. */
+ if (lp->free_fid_stack != INVALID_ROFF)
+ __env_alloc_free(reginfo,
+ R_ADDR(reginfo, lp->free_fid_stack));
+
+ /* Discard the list of in-memory log file markers. */
+ while ((filestart = SH_TAILQ_FIRST(&lp->logfiles,
+ __db_filestart)) != NULL) {
+ SH_TAILQ_REMOVE(&lp->logfiles, filestart, links,
+ __db_filestart);
+ __env_alloc_free(reginfo, filestart);
+ }
+
+ while ((filestart = SH_TAILQ_FIRST(&lp->free_logfiles,
+ __db_filestart)) != NULL) {
+ SH_TAILQ_REMOVE(&lp->free_logfiles, filestart, links,
+ __db_filestart);
+ __env_alloc_free(reginfo, filestart);
+ }
+
+ /* Discard commit queue elements. */
+ while ((commit = SH_TAILQ_FIRST(&lp->free_commits,
+ __db_commit)) != NULL) {
+ SH_TAILQ_REMOVE(&lp->free_commits, commit, links,
+ __db_commit);
+ __env_alloc_free(reginfo, commit);
+ }
+
+ /* Discard replication bulk buffer. */
+ if (lp->bulk_buf != INVALID_ROFF) {
+ __env_alloc_free(reginfo,
+ R_ADDR(reginfo, lp->bulk_buf));
+ lp->bulk_buf = INVALID_ROFF;
+ }
+ }
+
+ /* Discard the per-thread DBREG mutex. */
+ if ((t_ret = __mutex_free(env, &dblp->mtx_dbreg)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /* Detach from the region. */
+ if ((t_ret = __env_region_detach(env, reginfo, 0)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /* Close open files, release allocated memory. */
+ if (dblp->lfhp != NULL) {
+ if ((t_ret =
+ __os_closehandle(env, dblp->lfhp)) != 0 && ret == 0)
+ ret = t_ret;
+ dblp->lfhp = NULL;
+ }
+ if (dblp->dbentry != NULL)
+ __os_free(env, dblp->dbentry);
+
+ __os_free(env, dblp);
+
+ env->lg_handle = NULL;
+ return (ret);
+}
+
+/*
+ * __log_get_cached_ckp_lsn --
+ * Retrieve any last checkpoint LSN that we may have found on startup.
+ *
+ * PUBLIC: int __log_get_cached_ckp_lsn __P((ENV *, DB_LSN *));
+ */
+int
+__log_get_cached_ckp_lsn(env, ckp_lsnp)
+ ENV *env;
+ DB_LSN *ckp_lsnp;
+{
+ DB_LOG *dblp;
+ LOG *lp;
+
+ dblp = env->lg_handle;
+ lp = (LOG *)dblp->reginfo.primary;
+
+ LOG_SYSTEM_LOCK(env);
+ *ckp_lsnp = lp->cached_ckp_lsn;
+ LOG_SYSTEM_UNLOCK(env);
+
+ return (0);
+}
+
+/*
+ * __log_region_mutex_count --
+ * Return the number of mutexes the log region will need.
+ *
+ * PUBLIC: u_int32_t __log_region_mutex_count __P((ENV *));
+ */
+u_int32_t
+__log_region_mutex_count(env)
+ ENV *env;
+{
+ /*
+ * We need a few assorted mutexes, and one per transaction waiting
+ * on the group commit list. We can't know how many that will be,
+ * but it should be bounded by the maximum active transactions.
+ */
+ return (env->dbenv->tx_init + 5);
+}
+
+/*
+ * __log_region_mutex_max --
+ * Return the number of additional mutexes the log region will need.
+ *
+ * PUBLIC: u_int32_t __log_region_mutex_max __P((ENV *));
+ */
+u_int32_t
+__log_region_mutex_max(env)
+ ENV *env;
+{
+ DB_ENV *dbenv;
+ u_int32_t count;
+
+ dbenv = env->dbenv;
+
+ if ((count = dbenv->tx_max) == 0)
+ count = DEF_MAX_TXNS;
+ if (count < dbenv->tx_init)
+ return (0);
+ return (count - dbenv->tx_init);
+}
+
+/*
+ * __log_region_size --
+ * Return the amount of space needed for the log region.
+ * Make the region large enough to hold txn_max transaction
+ * detail structures plus some space to hold thread handles
+ * and the beginning of the alloc region and anything we
+ * need for mutex system resource recording.
+ * PUBLIC: size_t __log_region_size __P((ENV *));
+ */
+size_t
+__log_region_size(env)
+ ENV *env;
+{
+ DB_ENV *dbenv;
+ size_t s;
+
+ dbenv = env->dbenv;
+
+ /* Set the default buffer size, if not otherwise configured. */
+ if (dbenv->lg_bsize == 0)
+ dbenv->lg_bsize = FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) ?
+ LG_BSIZE_INMEM : LG_BSIZE_DEFAULT;
+
+ s = dbenv->lg_bsize;
+ /* Allocate the initial fileid allocation, plus some path name space. */
+ s += dbenv->lg_fileid_init * __env_alloc_size((sizeof(FNAME)) + 16);
+
+ return (s);
+}
+/*
+ * __log_region_max --
+ * Return the amount of extra memory to allocate for logging informaition.
+ * PUBLIC: size_t __log_region_max __P((ENV *));
+ */
+size_t
+__log_region_max(env)
+ ENV *env;
+{
+
+ DB_ENV *dbenv;
+ size_t s;
+
+ dbenv = env->dbenv;
+ if (dbenv->lg_fileid_init == 0) {
+ if ((s = dbenv->lg_regionmax) == 0)
+ s = LG_BASE_REGION_SIZE;
+ } else if ((s = dbenv->lg_regionmax) != 0 &&
+ s < dbenv->lg_fileid_init * (__env_alloc_size(sizeof(FNAME)) + 16))
+ s = 0;
+ else if (s != 0)
+ s -= dbenv->lg_fileid_init *
+ (__env_alloc_size(sizeof(FNAME)) + 16);
+
+ return (s);
+}
+
+/*
+ * __log_vtruncate
+ * This is a virtual truncate. We set up the log indicators to
+ * make everyone believe that the given record is the last one in the
+ * log. Returns with the next valid LSN (i.e., the LSN of the next
+ * record to be written). This is used in replication to discard records
+ * in the log file that do not agree with the master.
+ *
+ * PUBLIC: int __log_vtruncate __P((ENV *, DB_LSN *, DB_LSN *, DB_LSN *));
+ */
+int
+__log_vtruncate(env, lsn, ckplsn, trunclsn)
+ ENV *env;
+ DB_LSN *lsn, *ckplsn, *trunclsn;
+{
+ DBT log_dbt;
+ DB_LOG *dblp;
+ DB_LOGC *logc;
+ LOG *lp;
+ u_int32_t bytes, len;
+ size_t offset;
+ int ret, t_ret;
+
+ /* Need to find out the length of this soon-to-be-last record. */
+ if ((ret = __log_cursor(env, &logc)) != 0)
+ return (ret);
+ memset(&log_dbt, 0, sizeof(log_dbt));
+ ret = __logc_get(logc, lsn, &log_dbt, DB_SET);
+ len = logc->len;
+ if ((t_ret = __logc_close(logc)) != 0 && ret == 0)
+ ret = t_ret;
+ if (ret != 0)
+ return (ret);
+
+ /* Now do the truncate. */
+ dblp = env->lg_handle;
+ lp = (LOG *)dblp->reginfo.primary;
+
+ LOG_SYSTEM_LOCK(env);
+
+ /*
+ * Flush the log so we can simply initialize the in-memory buffer
+ * after the truncate.
+ */
+ if ((ret = __log_flush_int(dblp, NULL, 0)) != 0)
+ goto err;
+
+ lp->lsn = *lsn;
+ lp->len = len;
+ lp->lsn.offset += lp->len;
+
+ offset = lp->b_off;
+ if (lp->db_log_inmemory && (ret =
+ __log_inmem_lsnoff(dblp, &lp->lsn, &offset)) != 0) {
+ lp->b_off = (db_size_t)offset;
+ goto err;
+ }
+ lp->b_off = (db_size_t)offset;
+
+ /*
+ * I am going to assume that the number of bytes written since
+ * the last checkpoint doesn't exceed a 32-bit number.
+ */
+ DB_ASSERT(env, lp->lsn.file >= ckplsn->file);
+ bytes = 0;
+ if (ckplsn->file != lp->lsn.file) {
+ bytes = lp->log_size - ckplsn->offset;
+ if (lp->lsn.file > ckplsn->file + 1)
+ bytes += lp->log_size *
+ ((lp->lsn.file - ckplsn->file) - 1);
+ bytes += lp->lsn.offset;
+ } else
+ bytes = lp->lsn.offset - ckplsn->offset;
+
+ lp->stat.st_wc_mbytes += bytes / MEGABYTE;
+ lp->stat.st_wc_bytes += bytes % MEGABYTE;
+
+ /*
+ * If the synced lsn is greater than our new end of log, reset it
+ * to our current end of log.
+ */
+ MUTEX_LOCK(env, lp->mtx_flush);
+ if (LOG_COMPARE(&lp->s_lsn, lsn) > 0)
+ lp->s_lsn = lp->lsn;
+ MUTEX_UNLOCK(env, lp->mtx_flush);
+
+ /* Initialize the in-region buffer to a pristine state. */
+ ZERO_LSN(lp->f_lsn);
+ lp->w_off = lp->lsn.offset;
+
+ if (trunclsn != NULL)
+ *trunclsn = lp->lsn;
+
+ /* Truncate the log to the new point. */
+ if ((ret = __log_zero(env, &lp->lsn)) != 0)
+ goto err;
+
+err: LOG_SYSTEM_UNLOCK(env);
+ return (ret);
+}
+
+/*
+ * __log_is_outdated --
+ * Used by the replication system to identify if a client's logs are too
+ * old.
+ *
+ * PUBLIC: int __log_is_outdated __P((ENV *, u_int32_t, int *));
+ */
+int
+__log_is_outdated(env, fnum, outdatedp)
+ ENV *env;
+ u_int32_t fnum;
+ int *outdatedp;
+{
+ DB_LOG *dblp;
+ LOG *lp;
+ char *name;
+ int ret;
+ u_int32_t cfile;
+ struct __db_filestart *filestart;
+
+ dblp = env->lg_handle;
+
+ /*
+ * The log represented by env is compared to the file number passed
+ * in fnum. If the log file fnum does not exist and is lower-numbered
+ * than the current logs, return *outdatedp non-zero, else we return 0.
+ */
+ if (FLD_ISSET(env->dbenv->lg_flags, DB_LOG_IN_MEMORY)) {
+ LOG_SYSTEM_LOCK(env);
+ lp = (LOG *)dblp->reginfo.primary;
+ filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart);
+ *outdatedp = filestart == NULL ? 0 : (fnum < filestart->file);
+ LOG_SYSTEM_UNLOCK(env);
+ return (0);
+ }
+
+ *outdatedp = 0;
+ if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0) {
+ __os_free(env, name);
+ return (ret);
+ }
+
+ /* If the file exists, we're just fine. */
+ if (__os_exists(env, name, NULL) == 0)
+ goto out;
+
+ /*
+ * It didn't exist, decide if the file number is too big or
+ * too little. If it's too little, then we need to indicate
+ * that the LSN is outdated.
+ */
+ LOG_SYSTEM_LOCK(env);
+ lp = (LOG *)dblp->reginfo.primary;
+ cfile = lp->lsn.file;
+ LOG_SYSTEM_UNLOCK(env);
+
+ if (cfile > fnum)
+ *outdatedp = 1;
+out: __os_free(env, name);
+ return (ret);
+}
+
+/*
+ * __log_zero --
+ * Zero out the tail of a log after a truncate.
+ *
+ * PUBLIC: int __log_zero __P((ENV *, DB_LSN *));
+ */
+int
+__log_zero(env, from_lsn)
+ ENV *env;
+ DB_LSN *from_lsn;
+{
+ DB_FH *fhp;
+ DB_LOG *dblp;
+ LOG *lp;
+ struct __db_filestart *filestart, *nextstart;
+ size_t nbytes, len, nw;
+ u_int32_t fn, mbytes, bytes;
+ u_int8_t buf[4096];
+ int ret;
+ char *fname;
+
+ dblp = env->lg_handle;
+ lp = (LOG *)dblp->reginfo.primary;
+ DB_ASSERT(env, LOG_COMPARE(from_lsn, &lp->lsn) <= 0);
+ if (LOG_COMPARE(from_lsn, &lp->lsn) > 0) {
+ __db_errx(env, DB_STR("2534",
+ "Warning: truncating to point beyond end of log"));
+ return (0);
+ }
+
+ if (lp->db_log_inmemory) {
+ /*
+ * Remove the files that are invalidated by this truncate.
+ */
+ for (filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart);
+ filestart != NULL; filestart = nextstart) {
+ nextstart = SH_TAILQ_NEXT(filestart,
+ links, __db_filestart);
+ if (filestart->file > from_lsn->file) {
+ SH_TAILQ_REMOVE(&lp->logfiles,
+ filestart, links, __db_filestart);
+ SH_TAILQ_INSERT_HEAD(&lp->free_logfiles,
+ filestart, links, __db_filestart);
+ }
+ }
+
+ return (0);
+ }
+
+ /* Close any open file handles so unlinks don't fail. */
+ if (dblp->lfhp != NULL) {
+ (void)__os_closehandle(env, dblp->lfhp);
+ dblp->lfhp = NULL;
+ }
+
+ /* Throw away any extra log files that we have around. */
+ for (fn = from_lsn->file + 1;; fn++) {
+ if (__log_name(dblp, fn, &fname, &fhp, DB_OSO_RDONLY) != 0) {
+ __os_free(env, fname);
+ break;
+ }
+ (void)__os_closehandle(env, fhp);
+ (void)time(&lp->timestamp);
+ ret = __os_unlink(env, fname, 0);
+ __os_free(env, fname);
+ if (ret != 0)
+ return (ret);
+ }
+
+ /* We removed some log files; have to 0 to end of file. */
+ if ((ret =
+ __log_name(dblp, from_lsn->file, &fname, &dblp->lfhp, 0)) != 0) {
+ __os_free(env, fname);
+ return (ret);
+ }
+ __os_free(env, fname);
+ if ((ret = __os_ioinfo(env,
+ NULL, dblp->lfhp, &mbytes, &bytes, NULL)) != 0)
+ goto err;
+ DB_ASSERT(env, (mbytes * MEGABYTE + bytes) >= from_lsn->offset);
+ len = (mbytes * MEGABYTE + bytes) - from_lsn->offset;
+
+ memset(buf, 0, sizeof(buf));
+
+ /* Initialize the write position. */
+ if ((ret = __os_seek(env, dblp->lfhp, 0, 0, from_lsn->offset)) != 0)
+ goto err;
+
+ while (len > 0) {
+ nbytes = len > sizeof(buf) ? sizeof(buf) : len;
+ if ((ret =
+ __os_write(env, dblp->lfhp, buf, nbytes, &nw)) != 0)
+ goto err;
+ len -= nbytes;
+ }
+
+err: (void)__os_closehandle(env, dblp->lfhp);
+ dblp->lfhp = NULL;
+
+ return (ret);
+}
+
+/*
+ * __log_inmem_lsnoff --
+ * Find the offset in the buffer of a given LSN.
+ *
+ * PUBLIC: int __log_inmem_lsnoff __P((DB_LOG *, DB_LSN *, size_t *));
+ */
+int
+__log_inmem_lsnoff(dblp, lsnp, offsetp)
+ DB_LOG *dblp;
+ DB_LSN *lsnp;
+ size_t *offsetp;
+{
+ LOG *lp;
+ struct __db_filestart *filestart;
+
+ lp = (LOG *)dblp->reginfo.primary;
+
+ SH_TAILQ_FOREACH(filestart, &lp->logfiles, links, __db_filestart)
+ if (filestart->file == lsnp->file) {
+ *offsetp = (u_int32_t)
+ (filestart->b_off + lsnp->offset) % lp->buffer_size;
+ return (0);
+ }
+
+ return (DB_NOTFOUND);
+}
+
+/*
+ * __log_inmem_newfile --
+ * Records the offset of the beginning of a new file in the in-memory
+ * buffer.
+ *
+ * PUBLIC: int __log_inmem_newfile __P((DB_LOG *, u_int32_t));
+ */
+int
+__log_inmem_newfile(dblp, file)
+ DB_LOG *dblp;
+ u_int32_t file;
+{
+ HDR hdr;
+ LOG *lp;
+ struct __db_filestart *filestart;
+ int ret;
+#ifdef DIAGNOSTIC
+ struct __db_filestart *first, *last;
+#endif
+
+ lp = (LOG *)dblp->reginfo.primary;
+
+ /*
+ * If the log buffer is empty, reuse the filestart entry.
+ */
+ filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart);
+ if (filestart != NULL &&
+ RINGBUF_LEN(lp, filestart->b_off, lp->b_off) <=
+ sizeof(HDR) + sizeof(LOGP)) {
+ filestart->file = file;
+ filestart->b_off = lp->b_off;
+ return (0);
+ }
+
+ /*
+ * We write an empty header at the end of every in-memory log file.
+ * This is used during cursor traversal to indicate when to switch the
+ * LSN to the next file.
+ */
+ if (file > 1) {
+ memset(&hdr, 0, sizeof(HDR));
+ __log_inmem_copyin(dblp, lp->b_off, &hdr, sizeof(HDR));
+ lp->b_off = (lp->b_off + sizeof(HDR)) % lp->buffer_size;
+ }
+
+ filestart = SH_TAILQ_FIRST(&lp->free_logfiles, __db_filestart);
+ if (filestart == NULL) {
+ if ((ret = __env_alloc(&dblp->reginfo,
+ sizeof(struct __db_filestart), &filestart)) != 0)
+ return (ret);
+ memset(filestart, 0, sizeof(*filestart));
+ } else
+ SH_TAILQ_REMOVE(&lp->free_logfiles, filestart,
+ links, __db_filestart);
+
+ filestart->file = file;
+ filestart->b_off = lp->b_off;
+
+#ifdef DIAGNOSTIC
+ first = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart);
+ last = SH_TAILQ_LAST(&(lp)->logfiles, links, __db_filestart);
+
+ /* Check that we don't wrap. */
+ DB_ASSERT(dblp->env, !first || first == last ||
+ RINGBUF_LEN(lp, first->b_off, lp->b_off) ==
+ RINGBUF_LEN(lp, first->b_off, last->b_off) +
+ RINGBUF_LEN(lp, last->b_off, lp->b_off));
+#endif
+
+ SH_TAILQ_INSERT_TAIL(&lp->logfiles, filestart, links);
+ return (0);
+}
+
+/*
+ * __log_inmem_chkspace --
+ * Ensure that the requested amount of space is available in the buffer,
+ * and invalidate the region.
+ * Note: assumes that the region lock is held on entry.
+ *
+ * PUBLIC: int __log_inmem_chkspace __P((DB_LOG *, size_t));
+ */
+int
+__log_inmem_chkspace(dblp, len)
+ DB_LOG *dblp;
+ size_t len;
+{
+ DB_LSN active_lsn, old_active_lsn;
+ ENV *env;
+ LOG *lp;
+ struct __db_filestart *filestart;
+ size_t offset;
+ int ret;
+
+ env = dblp->env;
+ lp = dblp->reginfo.primary;
+
+ DB_ASSERT(env, lp->db_log_inmemory);
+
+ /*
+ * Allow room for an extra header so that we don't need to check for
+ * space when switching files.
+ */
+ len += sizeof(HDR);
+
+ /*
+ * If transactions are enabled and we're about to fill available space,
+ * update the active LSN and recheck. If transactions aren't enabled,
+ * don't even bother checking: in that case we can always overwrite old
+ * log records, because we're never going to abort.
+ */
+ while (TXN_ON(env) &&
+ RINGBUF_LEN(lp, lp->b_off, lp->a_off) <= len) {
+ old_active_lsn = lp->active_lsn;
+ active_lsn = lp->lsn;
+
+ /*
+ * Drop the log region lock so we don't hold it while
+ * taking the transaction region lock.
+ */
+ LOG_SYSTEM_UNLOCK(env);
+ ret = __txn_getactive(env, &active_lsn);
+ LOG_SYSTEM_LOCK(env);
+ if (ret != 0)
+ return (ret);
+ active_lsn.offset = 0;
+
+ /* If we didn't make any progress, give up. */
+ if (LOG_COMPARE(&active_lsn, &old_active_lsn) == 0) {
+ __db_errx(env, DB_STR("2535",
+"In-memory log buffer is full (an active transaction spans the buffer)"));
+ return (DB_LOG_BUFFER_FULL);
+ }
+
+ /* Make sure we're moving the region LSN forwards. */
+ if (LOG_COMPARE(&active_lsn, &lp->active_lsn) > 0) {
+ lp->active_lsn = active_lsn;
+ offset = lp->a_off;
+ (void)__log_inmem_lsnoff(dblp, &active_lsn, &offset);
+ lp->a_off = (db_size_t)offset;
+ }
+ }
+
+ /*
+ * Remove the first file if it is invalidated by this write.
+ * Log records can't be bigger than a file, so we only need to
+ * check the first file.
+ */
+ filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart);
+ if (filestart != NULL &&
+ RINGBUF_LEN(lp, lp->b_off, filestart->b_off) <= len) {
+ SH_TAILQ_REMOVE(&lp->logfiles, filestart,
+ links, __db_filestart);
+ SH_TAILQ_INSERT_HEAD(&lp->free_logfiles, filestart,
+ links, __db_filestart);
+ lp->f_lsn.file = filestart->file + 1;
+ }
+
+ return (0);
+}
+
+/*
+ * __log_inmem_copyout --
+ * Copies the given number of bytes from the buffer -- no checking.
+ * Note: assumes that the region lock is held on entry.
+ *
+ * PUBLIC: void __log_inmem_copyout __P((DB_LOG *, size_t, void *, size_t));
+ */
+void
+__log_inmem_copyout(dblp, offset, buf, size)
+ DB_LOG *dblp;
+ size_t offset;
+ void *buf;
+ size_t size;
+{
+ LOG *lp;
+ size_t nbytes;
+
+ lp = (LOG *)dblp->reginfo.primary;
+ nbytes = (offset + size < lp->buffer_size) ?
+ size : lp->buffer_size - offset;
+ memcpy(buf, dblp->bufp + offset, nbytes);
+ if (nbytes < size)
+ memcpy((u_int8_t *)buf + nbytes, dblp->bufp, size - nbytes);
+}
+
+/*
+ * __log_inmem_copyin --
+ * Copies the given number of bytes into the buffer -- no checking.
+ * Note: assumes that the region lock is held on entry.
+ *
+ * PUBLIC: void __log_inmem_copyin __P((DB_LOG *, size_t, void *, size_t));
+ */
+void
+__log_inmem_copyin(dblp, offset, buf, size)
+ DB_LOG *dblp;
+ size_t offset;
+ void *buf;
+ size_t size;
+{
+ LOG *lp;
+ size_t nbytes;
+
+ lp = (LOG *)dblp->reginfo.primary;
+ nbytes = (offset + size < lp->buffer_size) ?
+ size : lp->buffer_size - offset;
+ memcpy(dblp->bufp + offset, buf, nbytes);
+ if (nbytes < size)
+ memcpy(dblp->bufp, (u_int8_t *)buf + nbytes, size - nbytes);
+}
+
+/*
+ * __log_set_version --
+ * Sets the current version of the log subsystem to the given version.
+ * Essentially this modifies the lp->persist.version field in the
+ * shared memory region. Called when region is initially created
+ * and when replication is starting up or finds a new master.
+ *
+ * PUBLIC: void __log_set_version __P((ENV *, u_int32_t));
+ */
+void
+__log_set_version(env, newver)
+ ENV *env;
+ u_int32_t newver;
+{
+ DB_LOG *dblp;
+ LOG *lp;
+
+ dblp = env->lg_handle;
+ lp = (LOG *)dblp->reginfo.primary;
+ /*
+ * We should be able to update this atomically without locking.
+ */
+ lp->persist.version = newver;
+}
+
+/*
+ * __log_get_oldversion --
+ * Returns the last version of log that this environment was working
+ * with. Since there could be several versions of log files, if
+ * the user upgraded and didn't log archive, we check the version
+ * of the first log file, compare it to the last log file. If those
+ * are different, then there is an older log existing, and we then
+ * walk backward in the log files looking for the version of the
+ * most recent older log file.
+ *
+ * PUBLIC: int __log_get_oldversion __P((ENV *, u_int32_t *));
+ */
+int
+__log_get_oldversion(env, ver)
+ ENV *env;
+ u_int32_t *ver;
+{
+ DBT rec;
+ DB_LOG *dblp;
+ DB_LOGC *logc;
+ DB_LSN lsn;
+ LOG *lp;
+ u_int32_t firstfnum, fnum, lastver, oldver;
+ int ret, t_ret;
+
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+
+ logc = NULL;
+ ret = 0;
+ oldver = DB_LOGVERSION;
+ /*
+ * If we're in-memory logs we're always the current version.
+ */
+ if (lp->db_log_inmemory) {
+ *ver = oldver;
+ return (0);
+ }
+ memset(&rec, 0, sizeof(rec));
+ if ((ret = __log_cursor(env, &logc)) != 0)
+ goto err;
+ /*
+ * Get the version numbers of the first and last log files.
+ */
+ if ((ret = __logc_get(logc, &lsn, &rec, DB_FIRST)) != 0) {
+ /*
+ * If there is no log file, we'll get DB_NOTFOUND.
+ * If we get that, set the version to the current.
+ */
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ goto err;
+ }
+ firstfnum = lsn.file;
+ if ((ret = __logc_get(logc, &lsn, &rec, DB_LAST)) != 0)
+ goto err;
+ if ((ret = __log_valid(dblp, firstfnum, 0, NULL, 0,
+ NULL, &oldver)) != 0)
+ goto err;
+ /*
+ * If the first and last LSN are in the same file, then we
+ * already have the version in oldver. Return it.
+ */
+ if (firstfnum == lsn.file)
+ goto err;
+
+ /*
+ * Otherwise they're in different files and we call __log_valid
+ * to get the version numbers in both files.
+ */
+ if ((ret = __log_valid(dblp, lsn.file, 0, NULL, 0,
+ NULL, &lastver)) != 0)
+ goto err;
+ /*
+ * If the version numbers are different, walk backward getting
+ * the version of each log file until we find one that is
+ * different than the last.
+ */
+ if (oldver != lastver) {
+ for (fnum = lsn.file - 1; fnum >= firstfnum; fnum--) {
+ if ((ret = __log_valid(dblp, fnum, 0, NULL, 0,
+ NULL, &oldver)) != 0)
+ goto err;
+ if (oldver != lastver)
+ break;
+ }
+ }
+err: if (logc != NULL && ((t_ret = __logc_close(logc)) != 0) && ret == 0)
+ ret = t_ret;
+ if (ret == 0 && ver != NULL)
+ *ver = oldver;
+ return (ret);
+}
diff --git a/src/log/log_archive.c b/src/log/log_archive.c
new file mode 100644
index 00000000..280a2071
--- /dev/null
+++ b/src/log/log_archive.c
@@ -0,0 +1,643 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/log.h"
+#include "dbinc/qam.h"
+#include "dbinc/txn.h"
+
+static int __absname __P((ENV *, char *, char *, char **));
+static int __build_data __P((ENV *, char *, char ***));
+static int __cmpfunc __P((const void *, const void *));
+static int __usermem __P((ENV *, char ***));
+
+/*
+ * __log_archive_pp --
+ * ENV->log_archive pre/post processing.
+ *
+ * PUBLIC: int __log_archive_pp __P((DB_ENV *, char **[], u_int32_t));
+ */
+int
+__log_archive_pp(dbenv, listp, flags)
+ DB_ENV *dbenv;
+ char ***listp;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_REQUIRES_CONFIG(env,
+ env->lg_handle, "DB_ENV->log_archive", DB_INIT_LOG);
+
+#undef OKFLAGS
+#define OKFLAGS (DB_ARCH_ABS | DB_ARCH_DATA | DB_ARCH_LOG | DB_ARCH_REMOVE)
+ if (flags != 0) {
+ if ((ret = __db_fchk(
+ env, "DB_ENV->log_archive", flags, OKFLAGS)) != 0)
+ return (ret);
+ if ((ret = __db_fcchk(env, "DB_ENV->log_archive",
+ flags, DB_ARCH_DATA, DB_ARCH_LOG)) != 0)
+ return (ret);
+ if ((ret = __db_fcchk(env, "DB_ENV->log_archive",
+ flags, DB_ARCH_REMOVE,
+ DB_ARCH_ABS | DB_ARCH_DATA | DB_ARCH_LOG)) != 0)
+ return (ret);
+ }
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__log_archive(env, listp, flags)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __log_archive --
+ * ENV->log_archive. Internal.
+ * PUBLIC: int __log_archive __P((ENV *, char **[], u_int32_t));
+ */
+int
+__log_archive(env, listp, flags)
+ ENV *env;
+ char ***listp;
+ u_int32_t flags;
+{
+ DBT rec;
+ DB_LOG *dblp;
+ DB_LOGC *logc;
+ DB_LSN stable_lsn;
+ LOG *lp;
+ u_int array_size, n;
+ u_int32_t fnum;
+ int handle_check, ret, t_ret;
+ char **array, **arrayp, *name, *p, *pref;
+#ifdef HAVE_GETCWD
+ char path[DB_MAXPATHLEN];
+#endif
+
+ dblp = env->lg_handle;
+ lp = (LOG *)dblp->reginfo.primary;
+ array = NULL;
+ name = NULL;
+ ret = 0;
+ COMPQUIET(fnum, 0);
+
+ if (flags != DB_ARCH_REMOVE)
+ *listp = NULL;
+
+ /* There are no log files if logs are in memory. */
+ if (lp->db_log_inmemory) {
+ LF_CLR(~DB_ARCH_DATA);
+ if (flags == 0)
+ return (0);
+ }
+
+ /*
+ * Check if the user wants the list of log files to remove and we're
+ * at a bad time in replication initialization.
+ */
+ handle_check = 0;
+ if (!LF_ISSET(DB_ARCH_DATA) &&
+ !LF_ISSET(DB_ARCH_LOG)) {
+ /*
+ * If we're locked out, just return success. No files
+ * can be archived right now. Any other error pass back
+ * to the caller.
+ */
+ handle_check = IS_ENV_REPLICATED(env);
+ if (handle_check && (ret = __archive_rep_enter(env)) != 0) {
+ if (ret == DB_REP_LOCKOUT)
+ ret = 0;
+ return (ret);
+ }
+ }
+
+ /*
+ * Prepend the original absolute pathname if the user wants an
+ * absolute path to the database environment directory.
+ */
+#ifdef HAVE_GETCWD
+ if (LF_ISSET(DB_ARCH_ABS)) {
+ /*
+ * XXX
+ * Can't trust getcwd(3) to set a valid errno, so don't display
+ * one unless we know it's good. It's likely a permissions
+ * problem: use something bland and useless in the default
+ * return value, so we don't send somebody off in the wrong
+ * direction.
+ */
+ __os_set_errno(0);
+ if (getcwd(path, sizeof(path)) == NULL) {
+ ret = __os_get_errno();
+ __db_err(env, ret, DB_STR("2570",
+ "no absolute path for the current directory"));
+ goto err;
+ }
+ pref = path;
+ } else
+#endif
+ pref = NULL;
+
+ LF_CLR(DB_ARCH_ABS);
+ switch (flags) {
+ case DB_ARCH_DATA:
+ ret = __build_data(env, pref, listp);
+ goto err;
+ case DB_ARCH_LOG:
+ memset(&rec, 0, sizeof(rec));
+ if ((ret = __log_cursor(env, &logc)) != 0)
+ goto err;
+#ifdef UMRW
+ ZERO_LSN(stable_lsn);
+#endif
+ ret = __logc_get(logc, &stable_lsn, &rec, DB_LAST);
+ if ((t_ret = __logc_close(logc)) != 0 && ret == 0)
+ ret = t_ret;
+ if (ret != 0)
+ goto err;
+ fnum = stable_lsn.file;
+ break;
+ case DB_ARCH_REMOVE:
+ __log_autoremove(env);
+ goto err;
+ case 0:
+
+ ret = __log_get_stable_lsn(env, &stable_lsn, 1);
+ /*
+ * A return of DB_NOTFOUND means the checkpoint LSN
+ * is before the beginning of the log files we have.
+ * This is not an error; it just means we're done.
+ */
+ if (ret != 0) {
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ goto err;
+ }
+ /* Remove any log files before the last stable LSN. */
+ fnum = stable_lsn.file - 1;
+ break;
+ default:
+ ret = __db_unknown_path(env, "__log_archive");
+ goto err;
+ }
+
+#define LIST_INCREMENT 64
+ /* Get some initial space. */
+ array_size = 64;
+ if ((ret = __os_malloc(env,
+ sizeof(char *) * array_size, &array)) != 0)
+ goto err;
+ array[0] = NULL;
+
+ /* Build an array of the file names. */
+ for (n = 0; fnum > 0; --fnum) {
+ if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0) {
+ __os_free(env, name);
+ goto err;
+ }
+ if (__os_exists(env, name, NULL) != 0) {
+ __os_free(env, name);
+ name = NULL;
+ if (LF_ISSET(DB_ARCH_LOG) && fnum == stable_lsn.file)
+ continue;
+ break;
+ }
+
+ if (n >= array_size - 2) {
+ array_size += LIST_INCREMENT;
+ if ((ret = __os_realloc(env,
+ sizeof(char *) * array_size, &array)) != 0)
+ goto err;
+ }
+
+ if (pref != NULL) {
+ if ((ret =
+ __absname(env, pref, name, &array[n])) != 0)
+ goto err;
+ __os_free(env, name);
+ } else if ((p = __db_rpath(name)) != NULL) {
+ if ((ret = __os_strdup(env, p + 1, &array[n])) != 0)
+ goto err;
+ __os_free(env, name);
+ } else
+ array[n] = name;
+
+ name = NULL;
+ array[++n] = NULL;
+ }
+
+ /* If there's nothing to return, we're done. */
+ if (n == 0)
+ goto err;
+
+ /* Sort the list. */
+ qsort(array, (size_t)n, sizeof(char *), __cmpfunc);
+
+ /* Rework the memory. */
+ if ((ret = __usermem(env, &array)) != 0)
+ goto err;
+
+ if (listp != NULL)
+ *listp = array;
+
+ if (0) {
+err: if (array != NULL) {
+ for (arrayp = array; *arrayp != NULL; ++arrayp)
+ __os_free(env, *arrayp);
+ __os_free(env, array);
+ }
+ if (name != NULL)
+ __os_free(env, name);
+ }
+ if (handle_check && (t_ret = __archive_rep_exit(env)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __log_get_stable_lsn --
+ * Get the stable lsn based on where checkpoints are.
+ *
+ * PUBLIC: int __log_get_stable_lsn __P((ENV *, DB_LSN *, int));
+ */
+int
+__log_get_stable_lsn(env, stable_lsn, group_wide)
+ ENV *env;
+ DB_LSN *stable_lsn;
+ int group_wide;
+{
+ DBT rec;
+ DB_LOGC *logc;
+ LOG *lp;
+ __txn_ckp_args *ckp_args;
+ int ret, t_ret;
+
+ lp = env->lg_handle->reginfo.primary;
+
+ ret = 0;
+ memset(&rec, 0, sizeof(rec));
+ if (!TXN_ON(env)) {
+ if ((ret = __log_get_cached_ckp_lsn(env, stable_lsn)) != 0)
+ goto err;
+ /*
+ * No need to check for a return value of DB_NOTFOUND;
+ * __txn_findlastckp returns 0 if no checkpoint record
+ * is found. Instead of checking the return value, we
+ * check to see if the return LSN has been filled in.
+ */
+ if (IS_ZERO_LSN(*stable_lsn) && (ret =
+ __txn_findlastckp(env, stable_lsn, NULL)) != 0)
+ goto err;
+ /*
+ * If the LSN has not been filled in return DB_NOTFOUND
+ * so that the caller knows it may be done.
+ */
+ if (IS_ZERO_LSN(*stable_lsn)) {
+ ret = DB_NOTFOUND;
+ goto err;
+ }
+ } else if ((ret = __txn_getckp(env, stable_lsn)) != 0)
+ goto err;
+ if ((ret = __log_cursor(env, &logc)) != 0)
+ goto err;
+ /*
+ * Read checkpoint records until we find one that is on disk,
+ * then copy the ckp_lsn to the stable_lsn;
+ */
+ while ((ret = __logc_get(logc, stable_lsn, &rec, DB_SET)) == 0 &&
+ (ret = __txn_ckp_read(env, rec.data, &ckp_args)) == 0) {
+ if (stable_lsn->file < lp->s_lsn.file ||
+ (stable_lsn->file == lp->s_lsn.file &&
+ stable_lsn->offset < lp->s_lsn.offset)) {
+ *stable_lsn = ckp_args->ckp_lsn;
+ __os_free(env, ckp_args);
+ break;
+ }
+ *stable_lsn = ckp_args->last_ckp;
+ __os_free(env, ckp_args);
+ }
+ if ((t_ret = __logc_close(logc)) != 0 && ret == 0)
+ ret = t_ret;
+#ifdef HAVE_REPLICATION_THREADS
+ /*
+ * If we have RepMgr, get the minimum group-aware LSN.
+ */
+ if (group_wide && ret == 0 && REP_ON(env) && APP_IS_REPMGR(env) &&
+ (t_ret = __repmgr_stable_lsn(env, stable_lsn)) != 0)
+ ret = t_ret;
+#else
+ COMPQUIET(group_wide, 0);
+#endif
+err:
+ return (ret);
+}
+
+/*
+ * __log_autoremove --
+ * Delete any non-essential log files.
+ *
+ * PUBLIC: void __log_autoremove __P((ENV *));
+ */
+void
+__log_autoremove(env)
+ ENV *env;
+{
+ int ret;
+ char **begin, **list;
+
+ /*
+ * Complain if there's an error, but don't return the error to our
+ * caller. Auto-remove is done when writing a log record, and we
+ * don't want to fail a write, which could fail the corresponding
+ * committing transaction, for a permissions error.
+ */
+ if ((ret = __log_archive(env, &list, DB_ARCH_ABS)) != 0) {
+ if (ret != DB_NOTFOUND)
+ __db_err(env, ret, DB_STR("2571",
+ "log file auto-remove"));
+ return;
+ }
+
+ /* Remove the files. */
+ if (list != NULL) {
+ for (begin = list; *list != NULL; ++list)
+ (void)__os_unlink(env, *list, 0);
+ __os_ufree(env, begin);
+ }
+}
+
+/*
+ * __build_data --
+ * Build a list of datafiles for return.
+ */
+static int
+__build_data(env, pref, listp)
+ ENV *env;
+ char *pref, ***listp;
+{
+ DBT rec;
+ DB_LOGC *logc;
+ DB_LSN lsn;
+ __dbreg_register_args *argp;
+ u_int array_size, last, n, nxt;
+ u_int32_t rectype;
+ int ret, t_ret;
+ char **array, **arrayp, **list, **lp, *p, *real_name;
+
+ /* Get some initial space. */
+ array_size = 64;
+ if ((ret = __os_malloc(env,
+ sizeof(char *) * array_size, &array)) != 0)
+ return (ret);
+ array[0] = NULL;
+
+ memset(&rec, 0, sizeof(rec));
+ if ((ret = __log_cursor(env, &logc)) != 0)
+ return (ret);
+ for (n = 0; (ret = __logc_get(logc, &lsn, &rec, DB_PREV)) == 0;) {
+ if (rec.size < sizeof(rectype)) {
+ ret = EINVAL;
+ __db_errx(env, DB_STR("2572",
+ "DB_ENV->log_archive: bad log record"));
+ break;
+ }
+
+ LOGCOPY_32(env, &rectype, rec.data);
+ if (rectype != DB___dbreg_register)
+ continue;
+ if ((ret =
+ __dbreg_register_read(env, rec.data, &argp)) != 0) {
+ ret = EINVAL;
+ __db_errx(env, DB_STR("2573",
+ "DB_ENV->log_archive: unable to read log record"));
+ break;
+ }
+
+ if (n >= array_size - 2) {
+ array_size += LIST_INCREMENT;
+ if ((ret = __os_realloc(env,
+ sizeof(char *) * array_size, &array)) != 0)
+ goto free_continue;
+ }
+
+ if ((ret = __os_strdup(env,
+ argp->name.data, &array[n++])) != 0)
+ goto free_continue;
+ array[n] = NULL;
+
+ if (argp->ftype == DB_QUEUE) {
+ if ((ret = __qam_extent_names(env,
+ argp->name.data, &list)) != 0)
+ goto q_err;
+ for (lp = list;
+ lp != NULL && *lp != NULL; lp++) {
+ if (n >= array_size - 2) {
+ array_size += LIST_INCREMENT;
+ if ((ret = __os_realloc(env,
+ sizeof(char *) *
+ array_size, &array)) != 0)
+ goto q_err;
+ }
+ if ((ret =
+ __os_strdup(env, *lp, &array[n++])) != 0)
+ goto q_err;
+ array[n] = NULL;
+ }
+q_err: if (list != NULL)
+ __os_free(env, list);
+ }
+free_continue: __os_free(env, argp);
+ if (ret != 0)
+ break;
+ }
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ if ((t_ret = __logc_close(logc)) != 0 && ret == 0)
+ ret = t_ret;
+ if (ret != 0)
+ goto err1;
+
+ /* If there's nothing to return, we're done. */
+ if (n == 0) {
+ ret = 0;
+ *listp = NULL;
+ goto err1;
+ }
+
+ /* Sort the list. */
+ qsort(array, (size_t)n, sizeof(char *), __cmpfunc);
+
+ /*
+ * Build the real pathnames, discarding nonexistent files and
+ * duplicates.
+ */
+ for (last = nxt = 0; nxt < n;) {
+ /*
+ * Discard duplicates. Last is the next slot we're going
+ * to return to the user, nxt is the next slot that we're
+ * going to consider.
+ */
+ if (last != nxt) {
+ array[last] = array[nxt];
+ array[nxt] = NULL;
+ }
+ for (++nxt; nxt < n &&
+ strcmp(array[last], array[nxt]) == 0; ++nxt) {
+ __os_free(env, array[nxt]);
+ array[nxt] = NULL;
+ }
+
+ /* Get the real name. */
+ if ((ret = __db_appname(env,
+ DB_APP_DATA, array[last], NULL, &real_name)) != 0)
+ goto err2;
+
+ /* If the file doesn't exist, ignore it. */
+ if (__os_exists(env, real_name, NULL) != 0) {
+ __os_free(env, real_name);
+ __os_free(env, array[last]);
+ array[last] = NULL;
+ continue;
+ }
+
+ /* Rework the name as requested by the user. */
+ __os_free(env, array[last]);
+ array[last] = NULL;
+ if (pref != NULL) {
+ ret = __absname(env, pref, real_name, &array[last]);
+ __os_free(env, real_name);
+ if (ret != 0)
+ goto err2;
+ } else if ((p = __db_rpath(real_name)) != NULL) {
+ ret = __os_strdup(env, p + 1, &array[last]);
+ __os_free(env, real_name);
+ if (ret != 0)
+ goto err2;
+ } else
+ array[last] = real_name;
+ ++last;
+ }
+
+ /* NULL-terminate the list. */
+ array[last] = NULL;
+
+ /* Rework the memory. */
+ if ((ret = __usermem(env, &array)) != 0)
+ goto err1;
+
+ *listp = array;
+ return (0);
+
+err2: /*
+ * XXX
+ * We've possibly inserted NULLs into the array list, so clean up a
+ * bit so that the other error processing works.
+ */
+ if (array != NULL)
+ for (; nxt < n; ++nxt)
+ __os_free(env, array[nxt]);
+ /* FALLTHROUGH */
+
+err1: if (array != NULL) {
+ for (arrayp = array; *arrayp != NULL; ++arrayp)
+ __os_free(env, *arrayp);
+ __os_free(env, array);
+ }
+ return (ret);
+}
+
+/*
+ * __absname --
+ * Return an absolute path name for the file.
+ */
+static int
+__absname(env, pref, name, newnamep)
+ ENV *env;
+ char *pref, *name, **newnamep;
+{
+ size_t l_pref, l_name;
+ int isabspath, ret;
+ char *newname;
+
+ l_name = strlen(name);
+ isabspath = __os_abspath(name);
+ l_pref = isabspath ? 0 : strlen(pref);
+
+ /* Malloc space for concatenating the two. */
+ if ((ret = __os_malloc(env,
+ l_pref + l_name + 2, &newname)) != 0)
+ return (ret);
+ *newnamep = newname;
+
+ /* Build the name. If `name' is an absolute path, ignore any prefix. */
+ if (!isabspath) {
+ memcpy(newname, pref, l_pref);
+ if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL)
+ newname[l_pref++] = PATH_SEPARATOR[0];
+ }
+ memcpy(newname + l_pref, name, l_name + 1);
+
+ return (0);
+}
+
+/*
+ * __usermem --
+ * Create a single chunk of memory that holds the returned information.
+ * If the user has their own malloc routine, use it.
+ */
+static int
+__usermem(env, listp)
+ ENV *env;
+ char ***listp;
+{
+ size_t len;
+ int ret;
+ char **array, **arrayp, **orig, *strp;
+
+ /* Find out how much space we need. */
+ for (len = 0, orig = *listp; *orig != NULL; ++orig)
+ len += sizeof(char *) + strlen(*orig) + 1;
+ len += sizeof(char *);
+
+ /* Allocate it and set up the pointers. */
+ if ((ret = __os_umalloc(env, len, &array)) != 0)
+ return (ret);
+
+ strp = (char *)(array + (orig - *listp) + 1);
+
+ /* Copy the original information into the new memory. */
+ for (orig = *listp, arrayp = array; *orig != NULL; ++orig, ++arrayp) {
+ len = strlen(*orig);
+ memcpy(strp, *orig, len + 1);
+ *arrayp = strp;
+ strp += len + 1;
+
+ __os_free(env, *orig);
+ }
+
+ /* NULL-terminate the list. */
+ *arrayp = NULL;
+
+ __os_free(env, *listp);
+ *listp = array;
+
+ return (0);
+}
+
+static int
+__cmpfunc(p1, p2)
+ const void *p1, *p2;
+{
+ return (strcmp(*((char * const *)p1), *((char * const *)p2)));
+}
diff --git a/src/log/log_compare.c b/src/log/log_compare.c
new file mode 100644
index 00000000..97b59338
--- /dev/null
+++ b/src/log/log_compare.c
@@ -0,0 +1,66 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/log.h"
+
+/*
+ * log_compare --
+ * Compare two LSN's; return 1, 0, -1 if first is >, == or < second.
+ *
+ * EXTERN: int log_compare __P((const DB_LSN *, const DB_LSN *));
+ */
+int
+log_compare(lsn0, lsn1)
+ const DB_LSN *lsn0, *lsn1;
+{
+ return (LOG_COMPARE(lsn0, lsn1));
+}
+
+/*
+ * __log_check_page_lsn --
+ * Panic if the page's lsn in past the end of the current log.
+ *
+ * PUBLIC: int __log_check_page_lsn __P((ENV *, DB *, DB_LSN *));
+ */
+int
+__log_check_page_lsn(env, dbp, lsnp)
+ ENV *env;
+ DB *dbp;
+ DB_LSN *lsnp;
+{
+ LOG *lp;
+ int ret;
+
+ lp = env->lg_handle->reginfo.primary;
+ LOG_SYSTEM_LOCK(env);
+
+ ret = LOG_COMPARE(lsnp, &lp->lsn);
+
+ LOG_SYSTEM_UNLOCK(env);
+
+ if (ret < 0)
+ return (0);
+
+ __db_errx(env, DB_STR_A("2506",
+ "file %s has LSN %lu/%lu, past end of log at %lu/%lu",
+ "%s %lu %lu %lu %lu"),
+ dbp == NULL ||
+ dbp->fname == NULL ? DB_STR_P("unknown") : dbp->fname,
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
+ __db_errx(env, DB_STR("2507",
+ "Commonly caused by moving a database from one database environment"));
+ __db_errx(env, DB_STR("2508",
+ "to another without clearing the database LSNs, or by removing all of"));
+ __db_errx(env, DB_STR("2509",
+ "the log files from a database environment"));
+ return (EINVAL);
+}
diff --git a/src/log/log_debug.c b/src/log/log_debug.c
new file mode 100644
index 00000000..32fb2542
--- /dev/null
+++ b/src/log/log_debug.c
@@ -0,0 +1,146 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+
+static int __log_printf_int __P((ENV *, DB_TXN *, const char *, va_list));
+
+/*
+ * __log_printf_capi --
+ * Write a printf-style format string into the DB log.
+ *
+ * PUBLIC: int __log_printf_capi __P((DB_ENV *, DB_TXN *, const char *, ...))
+ * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4)));
+ */
+int
+#ifdef STDC_HEADERS
+__log_printf_capi(DB_ENV *dbenv, DB_TXN *txnid, const char *fmt, ...)
+#else
+__log_printf_capi(dbenv, txnid, fmt, va_alist)
+ DB_ENV *dbenv;
+ DB_TXN *txnid;
+ const char *fmt;
+ va_dcl
+#endif
+{
+ va_list ap;
+ int ret;
+
+#ifdef STDC_HEADERS
+ va_start(ap, fmt);
+#else
+ va_start(ap);
+#endif
+ ret = __log_printf_pp(dbenv, txnid, fmt, ap);
+ va_end(ap);
+
+ return (ret);
+}
+
+/*
+ * __log_printf_pp --
+ * Handle the arguments and call an internal routine to do the work.
+ *
+ * The reason this routine isn't just folded into __log_printf_capi
+ * is because the C++ API has to call a C API routine, and you can
+ * only pass variadic arguments to a single routine.
+ *
+ * PUBLIC: int __log_printf_pp
+ * PUBLIC: __P((DB_ENV *, DB_TXN *, const char *, va_list));
+ */
+int
+__log_printf_pp(dbenv, txnid, fmt, ap)
+ DB_ENV *dbenv;
+ DB_TXN *txnid;
+ const char *fmt;
+ va_list ap;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_REQUIRES_CONFIG(env,
+ env->lg_handle, "DB_ENV->log_printf", DB_INIT_LOG);
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__log_printf_int(env, txnid, fmt, ap)), 0, ret);
+ va_end(ap);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __log_printf --
+ * Write a printf-style format string into the DB log.
+ *
+ * PUBLIC: int __log_printf __P((ENV *, DB_TXN *, const char *, ...))
+ * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4)));
+ */
+int
+#ifdef STDC_HEADERS
+__log_printf(ENV *env, DB_TXN *txnid, const char *fmt, ...)
+#else
+__log_printf(env, txnid, fmt, va_alist)
+ ENV *env;
+ DB_TXN *txnid;
+ const char *fmt;
+ va_dcl
+#endif
+{
+ va_list ap;
+ int ret;
+
+#ifdef STDC_HEADERS
+ va_start(ap, fmt);
+#else
+ va_start(ap);
+#endif
+ ret = __log_printf_int(env, txnid, fmt, ap);
+ va_end(ap);
+
+ return (ret);
+}
+
+/*
+ * __log_printf_int --
+ * Write a printf-style format string into the DB log (internal).
+ */
+static int
+__log_printf_int(env, txnid, fmt, ap)
+ ENV *env;
+ DB_TXN *txnid;
+ const char *fmt;
+ va_list ap;
+{
+ DBT opdbt, msgdbt;
+ DB_LSN lsn;
+ char __logbuf[2048]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */
+
+ if (!DBENV_LOGGING(env)) {
+ __db_errx(env, DB_STR("2510",
+ "Logging not currently permitted"));
+ return (EAGAIN);
+ }
+
+ memset(&opdbt, 0, sizeof(opdbt));
+ opdbt.data = "DIAGNOSTIC";
+ opdbt.size = sizeof("DIAGNOSTIC") - 1;
+
+ memset(&msgdbt, 0, sizeof(msgdbt));
+ msgdbt.data = __logbuf;
+ msgdbt.size = (u_int32_t)vsnprintf(__logbuf, sizeof(__logbuf), fmt, ap);
+
+ return (__db_debug_log(
+ env, txnid, &lsn, 0, &opdbt, -1, &msgdbt, NULL, 0));
+}
diff --git a/src/log/log_get.c b/src/log/log_get.c
new file mode 100644
index 00000000..db30c969
--- /dev/null
+++ b/src/log/log_get.c
@@ -0,0 +1,1626 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/crypto.h"
+#include "dbinc/db_page.h"
+#include "dbinc/hmac.h"
+#include "dbinc/log.h"
+#include "dbinc/hash.h"
+
+typedef enum { L_ALREADY, L_ACQUIRED, L_NONE } RLOCK;
+
+static int __logc_close_pp __P((DB_LOGC *, u_int32_t));
+static int __logc_get_pp __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t));
+static int __logc_get_int __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t));
+static int __logc_hdrchk __P((DB_LOGC *, DB_LSN *, HDR *, int *));
+static int __logc_incursor __P((DB_LOGC *, DB_LSN *, HDR *, u_int8_t **));
+static int __logc_inregion __P((DB_LOGC *,
+ DB_LSN *, RLOCK *, DB_LSN *, HDR *, u_int8_t **, int *));
+static int __logc_io __P((DB_LOGC *,
+ u_int32_t, u_int32_t, void *, size_t *, int *));
+static int __logc_ondisk __P((DB_LOGC *,
+ DB_LSN *, DB_LSN *, u_int32_t, HDR *, u_int8_t **, int *));
+static int __logc_set_maxrec __P((DB_LOGC *, char *));
+static int __logc_shortread __P((DB_LOGC *, DB_LSN *, int));
+static int __logc_version_pp __P((DB_LOGC *, u_int32_t *, u_int32_t));
+
+/*
+ * __log_cursor_pp --
+ * ENV->log_cursor
+ *
+ * PUBLIC: int __log_cursor_pp __P((DB_ENV *, DB_LOGC **, u_int32_t));
+ */
+int
+__log_cursor_pp(dbenv, logcp, flags)
+ DB_ENV *dbenv;
+ DB_LOGC **logcp;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_REQUIRES_CONFIG(env,
+ env->lg_handle, "DB_ENV->log_cursor", DB_INIT_LOG);
+
+ /* Validate arguments. */
+ if ((ret = __db_fchk(env, "DB_ENV->log_cursor", flags, 0)) != 0)
+ return (ret);
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__log_cursor(env, logcp)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __log_cursor --
+ * Create a log cursor.
+ *
+ * PUBLIC: int __log_cursor __P((ENV *, DB_LOGC **));
+ */
+int
+__log_cursor(env, logcp)
+ ENV *env;
+ DB_LOGC **logcp;
+{
+ DB_LOGC *logc;
+ int ret;
+
+ *logcp = NULL;
+
+ /* Allocate memory for the cursor. */
+ if ((ret = __os_calloc(env, 1, sizeof(DB_LOGC), &logc)) != 0)
+ return (ret);
+
+ logc->bp_size = LG_CURSOR_BUF_SIZE;
+ /*
+ * Set this to something positive.
+ */
+ logc->bp_maxrec = MEGABYTE;
+ if ((ret = __os_malloc(env, logc->bp_size, &logc->bp)) != 0) {
+ __os_free(env, logc);
+ return (ret);
+ }
+
+ logc->env = env;
+ logc->close = __logc_close_pp;
+ logc->get = __logc_get_pp;
+ logc->version = __logc_version_pp;
+
+ *logcp = logc;
+ return (0);
+}
+
+/*
+ * __logc_close_pp --
+ * DB_LOGC->close pre/post processing.
+ */
+static int
+__logc_close_pp(logc, flags)
+ DB_LOGC *logc;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = logc->env;
+
+ if ((ret = __db_fchk(env, "DB_LOGC->close", flags, 0)) != 0)
+ return (ret);
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__logc_close(logc)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __logc_close --
+ * DB_LOGC->close.
+ *
+ * PUBLIC: int __logc_close __P((DB_LOGC *));
+ */
+int
+__logc_close(logc)
+ DB_LOGC *logc;
+{
+ ENV *env;
+
+ env = logc->env;
+
+ if (logc->fhp != NULL) {
+ (void)__os_closehandle(env, logc->fhp);
+ logc->fhp = NULL;
+ }
+
+ if (logc->dbt.data != NULL)
+ __os_free(env, logc->dbt.data);
+
+ __os_free(env, logc->bp);
+ __os_free(env, logc);
+
+ return (0);
+}
+
+/*
+ * __logc_version_pp --
+ * DB_LOGC->version.
+ */
+static int
+__logc_version_pp(logc, versionp, flags)
+ DB_LOGC *logc;
+ u_int32_t *versionp;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = logc->env;
+
+ if ((ret = __db_fchk(env, "DB_LOGC->version", flags, 0)) != 0)
+ return (ret);
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__logc_version(logc, versionp)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __logc_version --
+ * DB_LOGC->version.
+ *
+ * PUBLIC: int __logc_version __P((DB_LOGC *, u_int32_t *));
+ */
+int
+__logc_version(logc, versionp)
+ DB_LOGC *logc;
+ u_int32_t *versionp;
+{
+ DBT hdrdbt;
+ DB_LOGC *plogc;
+ DB_LSN plsn;
+ ENV *env;
+ LOGP *persist;
+ int ret, t_ret;
+
+ env = logc->env;
+ if (IS_ZERO_LSN(logc->lsn)) {
+ __db_errx(env, DB_STR("2574", "DB_LOGC->get: unset cursor"));
+ return (EINVAL);
+ }
+ ret = 0;
+ /*
+ * Check if the persist info we have is for the same file
+ * as the current cursor position. If we already have the
+ * information, then we're done. If not, we open a new
+ * log cursor and get the header.
+ *
+ * Since most users walk forward through the log when
+ * using this feature (i.e. printlog) we're likely to
+ * have the information we need.
+ */
+ if (logc->lsn.file != logc->p_lsn.file) {
+ if ((ret = __log_cursor(env, &plogc)) != 0)
+ return (ret);
+ plsn.file = logc->lsn.file;
+ plsn.offset = 0;
+ plogc->lsn = plsn;
+ memset(&hdrdbt, 0, sizeof(DBT));
+ if ((ret = __logc_get_int(plogc,
+ &plsn, &hdrdbt, DB_SET)) == 0) {
+ persist = (LOGP *)hdrdbt.data;
+ if (LOG_SWAPPED(env))
+ __log_persistswap(persist);
+ logc->p_lsn = logc->lsn;
+ logc->p_version = persist->version;
+ }
+ if ((t_ret = __logc_close(plogc)) != 0 && ret == 0)
+ ret = t_ret;
+ }
+ /* Return the version. */
+ if (ret == 0)
+ *versionp = logc->p_version;
+ return (ret);
+}
+
+/*
+ * __logc_get_pp --
+ * DB_LOGC->get pre/post processing.
+ */
+static int
+__logc_get_pp(logc, alsn, dbt, flags)
+ DB_LOGC *logc;
+ DB_LSN *alsn;
+ DBT *dbt;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = logc->env;
+
+ /* Validate arguments. */
+ switch (flags) {
+ case DB_CURRENT:
+ case DB_FIRST:
+ case DB_LAST:
+ case DB_NEXT:
+ case DB_PREV:
+ break;
+ case DB_SET:
+ if (IS_ZERO_LSN(*alsn)) {
+ __db_errx(env, DB_STR_A("2575",
+ "DB_LOGC->get: invalid LSN: %lu/%lu", "%lu %lu"),
+ (u_long)alsn->file, (u_long)alsn->offset);
+ return (EINVAL);
+ }
+ break;
+ default:
+ return (__db_ferr(env, "DB_LOGC->get", 1));
+ }
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__logc_get(logc, alsn, dbt, flags)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __logc_get --
+ * DB_LOGC->get.
+ *
+ * PUBLIC: int __logc_get __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t));
+ */
+int
+__logc_get(logc, alsn, dbt, flags)
+ DB_LOGC *logc;
+ DB_LSN *alsn;
+ DBT *dbt;
+ u_int32_t flags;
+{
+ DB_LSN saved_lsn;
+ ENV *env;
+ LOGP *persist;
+ int ret;
+
+ env = logc->env;
+
+ /*
+ * On error, we take care not to overwrite the caller's LSN. This
+ * is because callers looking for the end of the log loop using the
+ * DB_NEXT flag, and expect to take the last successful lsn out of
+ * the passed-in structure after DB_LOGC->get fails with DB_NOTFOUND.
+ *
+ * !!!
+ * This line is often flagged an uninitialized memory read during a
+ * Purify or similar tool run, as the application didn't initialize
+ * *alsn. If the application isn't setting the DB_SET flag, there is
+ * no reason it should have initialized *alsn, but we can't know that
+ * and we want to make sure we never overwrite whatever the application
+ * put in there.
+ */
+ saved_lsn = *alsn;
+ /*
+ * If we get one of the log's header records as a result of doing a
+ * DB_FIRST, DB_NEXT, DB_LAST or DB_PREV, repeat the operation, log
+ * file header records aren't useful to applications.
+ */
+ if ((ret = __logc_get_int(logc, alsn, dbt, flags)) != 0) {
+ *alsn = saved_lsn;
+ return (ret);
+ }
+ /*
+ * The DBT was populated by the call to __logc_get_int, copy the data
+ * out of DB_DBT_USERMEM space if it is there.
+ */
+ if ((ret = __dbt_usercopy(env, dbt)) != 0)
+ return (ret);
+
+ if (alsn->offset == 0 && (flags == DB_FIRST ||
+ flags == DB_NEXT || flags == DB_LAST || flags == DB_PREV)) {
+ switch (flags) {
+ case DB_FIRST:
+ flags = DB_NEXT;
+ break;
+ case DB_LAST:
+ flags = DB_PREV;
+ break;
+ case DB_NEXT:
+ case DB_PREV:
+ default:
+ break;
+ }
+ /*
+ * If we're walking the log and we find a persist header
+ * then store so that we may use it later if needed.
+ */
+ persist = (LOGP *)dbt->data;
+ if (LOG_SWAPPED(env))
+ __log_persistswap(persist);
+ logc->p_lsn = *alsn;
+ logc->p_version = persist->version;
+ if (F_ISSET(dbt, DB_DBT_MALLOC)) {
+ __os_free(env, dbt->data);
+ dbt->data = NULL;
+ }
+ if ((ret = __logc_get_int(logc, alsn, dbt, flags)) != 0) {
+ *alsn = saved_lsn;
+ goto err;
+ }
+ }
+
+err: __dbt_userfree(env, dbt, NULL, NULL);
+ return (ret);
+}
+
+/*
+ * __logc_get_int --
+ * Get a log record; internal version.
+ */
+static int
+__logc_get_int(logc, alsn, dbt, flags)
+ DB_LOGC *logc;
+ DB_LSN *alsn;
+ DBT *dbt;
+ u_int32_t flags;
+{
+ DB_CIPHER *db_cipher;
+ DB_LOG *dblp;
+ DB_LSN last_lsn, nlsn;
+ ENV *env;
+ HDR hdr;
+ LOG *lp;
+ RLOCK rlock;
+ logfile_validity status;
+ u_int32_t cnt, logfsz, orig_flags;
+ u_int8_t *rp;
+ int eof, is_hmac, need_cksum, ret;
+ size_t blen;
+#ifdef HAVE_LOG_CHECKSUM
+ u_int32_t i, logtype, version;
+ char chksumbuf[256];
+ u_int8_t ch;
+#endif
+
+ env = logc->env;
+ db_cipher = env->crypto_handle;
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+ eof = is_hmac = 0;
+ orig_flags = flags; /* flags may be altered later. */
+ blen = 0;
+ logfsz = lp->persist.log_size;
+
+ /*
+ * We don't acquire the log region lock until we need it, and we
+ * release it as soon as we're done.
+ */
+ rlock = F_ISSET(logc, DB_LOG_LOCKED) ? L_ALREADY : L_NONE;
+
+#ifdef HAVE_LOG_CHECKSUM
+nextrec:
+#endif
+ nlsn = logc->lsn;
+ switch (flags) {
+ case DB_NEXT: /* Next log record. */
+ if (!IS_ZERO_LSN(nlsn)) {
+ /* Increment the cursor by the cursor record size. */
+ nlsn.offset += logc->len;
+ break;
+ }
+ flags = DB_FIRST;
+ /* FALLTHROUGH */
+ case DB_FIRST: /* First log record. */
+ /* Find the first log file. */
+ if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0)
+ goto err;
+
+ /*
+ * DB_LV_INCOMPLETE:
+ * Theoretically, the log file we want could be created
+ * but not yet written, the "first" log record must be
+ * in the log buffer.
+ * DB_LV_NORMAL:
+ * DB_LV_OLD_READABLE:
+ * We found a log file we can read.
+ * DB_LV_NONEXISTENT:
+ * No log files exist, the "first" log record must be in
+ * the log buffer.
+ * DB_LV_OLD_UNREADABLE:
+ * No readable log files exist, we're at the cross-over
+ * point between two versions. The "first" log record
+ * must be in the log buffer.
+ */
+ switch (status) {
+ case DB_LV_INCOMPLETE:
+ DB_ASSERT(env, lp->lsn.file == cnt);
+ /* FALLTHROUGH */
+ case DB_LV_NORMAL:
+ case DB_LV_OLD_READABLE:
+ nlsn.file = cnt;
+ break;
+ case DB_LV_NONEXISTENT:
+ nlsn.file = 1;
+ DB_ASSERT(env, lp->lsn.file == nlsn.file);
+ break;
+ case DB_LV_OLD_UNREADABLE:
+ nlsn.file = cnt + 1;
+ DB_ASSERT(env, lp->lsn.file == nlsn.file);
+ break;
+ }
+ nlsn.offset = 0;
+ break;
+ case DB_CURRENT: /* Current log record. */
+ break;
+ case DB_PREV: /* Previous log record. */
+ if (!IS_ZERO_LSN(nlsn)) {
+ /* If at start-of-file, move to the previous file. */
+ if (nlsn.offset == 0) {
+ if (nlsn.file == 1) {
+ ret = DB_NOTFOUND;
+ goto err;
+ }
+ if ((!lp->db_log_inmemory &&
+ (__log_valid(dblp, nlsn.file - 1, 0, NULL,
+ 0, &status, NULL) != 0 ||
+ (status != DB_LV_NORMAL &&
+ status != DB_LV_OLD_READABLE)))) {
+ ret = DB_NOTFOUND;
+ goto err;
+ }
+
+ --nlsn.file;
+ }
+ nlsn.offset = logc->prev;
+ break;
+ }
+ /* FALLTHROUGH */
+ case DB_LAST: /* Last log record. */
+ if (rlock == L_NONE) {
+ rlock = L_ACQUIRED;
+ LOG_SYSTEM_LOCK(env);
+ }
+ nlsn.file = lp->lsn.file;
+ nlsn.offset = lp->lsn.offset - lp->len;
+ break;
+ case DB_SET: /* Set log record. */
+ nlsn = *alsn;
+ break;
+ default:
+ ret = __db_unknown_path(env, "__logc_get_int");
+ goto err;
+ }
+
+ if (0) { /* Move to the next file. */
+next_file: ++nlsn.file;
+ nlsn.offset = 0;
+ }
+
+ /*
+ * The above switch statement should have set nlsn to the lsn of
+ * the requested record.
+ */
+
+ if (CRYPTO_ON(env)) {
+ hdr.size = HDR_CRYPTO_SZ;
+ is_hmac = 1;
+ } else {
+ hdr.size = HDR_NORMAL_SZ;
+ is_hmac = 0;
+ }
+
+ /*
+ * Check to see if the record is in the cursor's buffer -- if so,
+ * we'll need to checksum it.
+ */
+ if ((ret = __logc_incursor(logc, &nlsn, &hdr, &rp)) != 0)
+ goto err;
+ if (rp != NULL)
+ goto cksum;
+
+ /*
+ * Look to see if we're moving backward in the log with the last record
+ * coming from the disk -- it means the record can't be in the region's
+ * buffer. Else, check the region's buffer.
+ *
+ * If the record isn't in the region's buffer, then either logs are
+ * in-memory, and we're done, or we're going to have to read the
+ * record from disk. We want to make a point of not reading past the
+ * end of the logical log (after recovery, there may be data after the
+ * end of the logical log, not to mention the log file may have been
+ * pre-allocated). So, zero out last_lsn, and initialize it inside
+ * __logc_inregion -- if it's still zero when we check it in
+ * __logc_ondisk, that's OK, it just means the logical end of the log
+ * isn't an issue for this request.
+ */
+ ZERO_LSN(last_lsn);
+ if (!F_ISSET(logc, DB_LOG_DISK) ||
+ LOG_COMPARE(&nlsn, &logc->lsn) > 0) {
+ F_CLR(logc, DB_LOG_DISK);
+
+ if ((ret = __logc_inregion(logc,
+ &nlsn, &rlock, &last_lsn, &hdr, &rp, &need_cksum)) != 0)
+ goto err;
+ if (rp != NULL) {
+ /*
+ * If we read the entire record from the in-memory log
+ * buffer, we don't need to checksum it, nor do we need
+ * to worry about vtruncate issues.
+ */
+ if (need_cksum)
+ goto cksum;
+ goto from_memory;
+ }
+ if (lp->db_log_inmemory)
+ goto nohdr;
+ }
+
+ /*
+ * We have to read from an on-disk file to retrieve the record.
+ * If we ever can't retrieve the record at offset 0, we're done,
+ * return EOF/DB_NOTFOUND.
+ *
+ * Discard the region lock if we're still holding it, the on-disk
+ * reading routines don't need it.
+ */
+ if (rlock == L_ACQUIRED) {
+ rlock = L_NONE;
+ LOG_SYSTEM_UNLOCK(env);
+ }
+ if ((ret = __logc_ondisk(
+ logc, &nlsn, &last_lsn, flags, &hdr, &rp, &eof)) != 0)
+ goto err;
+
+ /*
+ * If we got a 0-length record, that means we're in the midst of some
+ * bytes that got 0'd as the result of a vtruncate. In that case or at
+ * the end of a file, with DB_NEXT we're going to have to retry.
+ */
+ if (eof || hdr.len == 0) {
+nohdr: switch (flags) {
+ case DB_LAST:
+ case DB_PREV:
+ /*
+ * We should never get here. If we recover a log
+ * file with 0's at the end, we'll treat the 0'd
+ * headers as the end of log and ignore them. If
+ * we're reading backwards from another file, then
+ * the first record in that new file should have its
+ * prev field set correctly.
+ * First check that the file exists.
+ */
+ if (eof && logc->bp_lsn.file != nlsn.file)
+ __db_errx(env, DB_STR_A("2583",
+ "Log file %d not found, check log directory configuration", "%d"),
+ nlsn.file);
+ else
+ __db_errx(env, DB_STR("2576",
+ "Encountered zero length records while traversing backwards"));
+ ret = __env_panic(env, DB_RUNRECOVERY);
+ goto err;
+ case DB_FIRST:
+ case DB_NEXT:
+ /*
+ * Zero'd records always indicate the end of a file,
+ * but only go to the next file once.
+ */
+ if (nlsn.offset != 0)
+ goto next_file;
+ /* FALLTHROUGH */
+ case DB_SET:
+ default:
+ ret = DB_NOTFOUND;
+ goto err;
+ }
+ }
+
+ F_SET(logc, DB_LOG_DISK);
+
+cksum: /*
+ * Discard the region lock if we're still holding it. (The path to
+ * get here is we acquired the region lock because of the caller's
+ * flag argument, but we found the record in the in-memory or cursor
+ * buffers. Improbable, but it's easy to avoid.)
+ */
+ if (rlock == L_ACQUIRED) {
+ rlock = L_NONE;
+ LOG_SYSTEM_UNLOCK(env);
+ }
+#ifdef HAVE_LOG_CHECKSUM
+ /*
+ * Checksum: there are two types of errors -- a configuration error
+ * or a checksum mismatch. The former is always bad. The latter is
+ * OK if we're searching for the end of the log, and very, very bad
+ * if we're reading random log records.
+ */
+ if ((ret = __db_check_chksum(env, &hdr, db_cipher,
+ hdr.chksum, rp + hdr.size, hdr.len - hdr.size, is_hmac)) != 0) {
+ /*
+ * This might be a log whose checksum does not include the hdr.
+ * Try again without the header, either for logs whose version
+ * is pre-DB_LOGCHKSUM, or for the persist record which contains
+ * the log version. Check for the zero offset first to avoid
+ * unwanted recursion in __logc_version().
+ *
+ * Set the cursor to the LSN we are trying to look at.
+ */
+ last_lsn = logc->lsn;
+ logc->lsn = nlsn;
+ if ((logc->lsn.offset == 0 ||
+ (__logc_version(logc, &version) == 0 &&
+ version < DB_LOGCHKSUM)) &&
+ __db_check_chksum(env, NULL, db_cipher, hdr.chksum,
+ rp + hdr.size, hdr.len - hdr.size, is_hmac) == 0) {
+ logc->lsn = last_lsn;
+ goto from_memory;
+ }
+
+ /*
+ * If we are iterating logs during log verification and basic
+ * header info is correct, we can skip the failed log record
+ * and goto next one.
+ */
+ if (F_ISSET(logc->env->lg_handle, DBLOG_VERIFYING) &&
+ (orig_flags == DB_FIRST || orig_flags == DB_LAST ||
+ orig_flags == DB_PREV || orig_flags == DB_NEXT) &&
+ hdr.size > 0 && hdr.len > hdr.size && hdr.len < logfsz &&
+ (((flags == DB_FIRST || flags == DB_NEXT) &&
+ hdr.prev == last_lsn.offset) ||
+ ((flags == DB_PREV || flags == DB_LAST) &&
+ last_lsn.offset - hdr.len == nlsn.offset))) {
+
+ flags = orig_flags;
+
+ logc->lsn = nlsn;
+ logc->len = hdr.len;
+ logc->prev = hdr.prev;
+
+ if (flags == DB_LAST)
+ flags = DB_PREV;
+ else if (flags == DB_FIRST)
+ flags = DB_NEXT;
+
+ memset(chksumbuf, 0, 256);
+ blen = 0;
+ for (i = 0; i < DB_MAC_KEY && blen < 256; i++) {
+ ch = hdr.chksum[i];
+ blen = strlen(chksumbuf);
+ snprintf(chksumbuf + blen, 255 - blen,
+ isprint(ch) ||
+ ch == 0x0a ? "%c" : "%#x ", ch);
+ }
+ /* Type field is always the first one in the record. */
+ memcpy(&logtype, rp + hdr.size, sizeof(logtype));
+ __db_errx(env, DB_STR_A("2577",
+ "DB_LOGC->get: log record LSN %lu/%lu: "
+ "checksum mismatch, hdr.chksum: %s, hdr.prev: %u, "
+ "hdr.len: %u, log type: %u. Skipping it and "
+ "continuing with the %s one",
+ "%lu %lu %s %u %u %u %s"),
+ (u_long)nlsn.file, (u_long)nlsn.offset, chksumbuf,
+ hdr.prev, hdr.len, logtype, flags == DB_NEXT ?
+ DB_STR_P("next") : DB_STR_P("previous"));
+ goto nextrec;
+ }
+
+ if (F_ISSET(logc, DB_LOG_SILENT_ERR)) {
+ if (ret == -1)
+ ret = EIO;
+ } else if (ret == -1) {
+ __db_errx(env, DB_STR_A("2578",
+ "DB_LOGC->get: log record LSN %lu/%lu: checksum mismatch",
+ "%lu %lu"), (u_long)nlsn.file, (u_long)nlsn.offset);
+ __db_errx(env, DB_STR("2579",
+ "DB_LOGC->get: catastrophic recovery may be required"));
+ ret = __env_panic(env, DB_RUNRECOVERY);
+ }
+ logc->lsn = last_lsn;
+ goto err;
+ }
+#endif
+
+from_memory:
+ /*
+ * Discard the region lock if we're still holding it. (The path to
+ * get here is we acquired the region lock because of the caller's
+ * flag argument, but we found the record in the in-memory or cursor
+ * buffers. Improbable, but it's easy to avoid.)
+ */
+ if (rlock == L_ACQUIRED) {
+ rlock = L_NONE;
+ LOG_SYSTEM_UNLOCK(env);
+ }
+
+ /* Copy the record into the user's DBT. */
+ if ((ret = __db_retcopy(env, dbt, rp + hdr.size,
+ (u_int32_t)(hdr.len - hdr.size),
+ &logc->dbt.data, &logc->dbt.ulen)) != 0)
+ goto err;
+
+ if (CRYPTO_ON(env)) {
+ if ((ret = db_cipher->decrypt(env, db_cipher->data,
+ hdr.iv, dbt->data, hdr.len - hdr.size)) != 0) {
+ ret = EAGAIN;
+ goto err;
+ }
+ /*
+ * Return the original log record size to the user,
+ * even though we've allocated more than that, possibly.
+ * The log record is decrypted in the user dbt, not in
+ * the buffer, so we must do this here after decryption,
+ * not adjust the len passed to the __db_retcopy call.
+ */
+ dbt->size = hdr.orig_size;
+ }
+
+ /* Update the cursor and the returned LSN. */
+ *alsn = nlsn;
+ logc->lsn = nlsn;
+ logc->len = hdr.len;
+ logc->prev = hdr.prev;
+
+err: if (rlock == L_ACQUIRED)
+ LOG_SYSTEM_UNLOCK(env);
+
+ return (ret);
+}
+
+/*
+ * __logc_incursor --
+ * Check to see if the requested record is in the cursor's buffer.
+ */
+static int
+__logc_incursor(logc, lsn, hdr, pp)
+ DB_LOGC *logc;
+ DB_LSN *lsn;
+ HDR *hdr;
+ u_int8_t **pp;
+{
+ ENV *env;
+ u_int8_t *p;
+ int eof;
+
+ env = logc->env;
+ *pp = NULL;
+
+ /*
+ * Test to see if the requested LSN could be part of the cursor's
+ * buffer.
+ *
+ * The record must be part of the same file as the cursor's buffer.
+ * The record must start at a byte offset equal to or greater than
+ * the cursor buffer.
+ * The record must not start at a byte offset after the cursor
+ * buffer's end.
+ */
+ if (logc->bp_lsn.file != lsn->file)
+ return (0);
+ if (logc->bp_lsn.offset > lsn->offset)
+ return (0);
+ if (logc->bp_lsn.offset + logc->bp_rlen <= lsn->offset + hdr->size)
+ return (0);
+
+ /*
+ * Read the record's header and check if the record is entirely held
+ * in the buffer. If the record is not entirely held, get it again.
+ * (The only advantage in having part of the record locally is that
+ * we might avoid a system call because we already have the HDR in
+ * memory.)
+ *
+ * If the header check fails for any reason, it must be because the
+ * LSN is bogus. Fail hard.
+ */
+ p = logc->bp + (lsn->offset - logc->bp_lsn.offset);
+ memcpy(hdr, p, hdr->size);
+ if (LOG_SWAPPED(env))
+ __log_hdrswap(hdr, CRYPTO_ON(env));
+ if (__logc_hdrchk(logc, lsn, hdr, &eof))
+ return (DB_NOTFOUND);
+ if (eof || logc->bp_lsn.offset + logc->bp_rlen < lsn->offset + hdr->len)
+ return (0);
+
+ *pp = p; /* Success. */
+
+ return (0);
+}
+
+/*
+ * __logc_inregion --
+ * Check to see if the requested record is in the region's buffer.
+ */
+static int
+__logc_inregion(logc, lsn, rlockp, last_lsn, hdr, pp, need_cksump)
+ DB_LOGC *logc;
+ DB_LSN *lsn, *last_lsn;
+ RLOCK *rlockp;
+ HDR *hdr;
+ u_int8_t **pp;
+ int *need_cksump;
+{
+ DB_LOG *dblp;
+ ENV *env;
+ LOG *lp;
+ size_t b_region, len, nr;
+ u_int32_t b_disk;
+ int eof, ret;
+ u_int8_t *p;
+
+ env = logc->env;
+ dblp = env->lg_handle;
+ lp = env->lg_handle->reginfo.primary;
+
+ ret = 0;
+ b_region = 0;
+ *pp = NULL;
+ *need_cksump = 0;
+
+ /* If we haven't yet acquired the log region lock, do so. */
+ if (*rlockp == L_NONE) {
+ *rlockp = L_ACQUIRED;
+ LOG_SYSTEM_LOCK(env);
+ }
+
+ /*
+ * The routines to read from disk must avoid reading past the logical
+ * end of the log, so pass that information back to it.
+ *
+ * Since they're reading directly from the disk, they must also avoid
+ * reading past the offset we've written out. If the log was
+ * truncated, it's possible that there are zeroes or garbage on
+ * disk after this offset, and the logical end of the log can
+ * come later than this point if the log buffer isn't empty.
+ */
+ *last_lsn = lp->lsn;
+ if (!lp->db_log_inmemory && last_lsn->offset > lp->w_off)
+ last_lsn->offset = lp->w_off;
+
+ /*
+ * Test to see if the requested LSN could be part of the region's
+ * buffer.
+ *
+ * During recovery, we read the log files getting the information to
+ * initialize the region. In that case, the region's lsn field will
+ * not yet have been filled in, use only the disk.
+ *
+ * The record must not start at a byte offset after the region buffer's
+ * end, since that means the request is for a record after the end of
+ * the log. Do this test even if the region's buffer is empty -- after
+ * recovery, the log files may continue past the declared end-of-log,
+ * and the disk reading routine will incorrectly attempt to read the
+ * remainder of the log.
+ *
+ * Otherwise, test to see if the region's buffer actually has what we
+ * want:
+ *
+ * The buffer must have some useful content.
+ * The record must be in the same file as the region's buffer and must
+ * start at a byte offset equal to or greater than the region's buffer.
+ */
+ if (IS_ZERO_LSN(lp->lsn))
+ return (0);
+ if (LOG_COMPARE(lsn, &lp->lsn) >= 0)
+ return (DB_NOTFOUND);
+ else if (lp->db_log_inmemory) {
+ if ((ret = __log_inmem_lsnoff(dblp, lsn, &b_region)) != 0)
+ return (ret);
+ } else if (lp->b_off == 0 || LOG_COMPARE(lsn, &lp->f_lsn) < 0)
+ return (0);
+
+ /*
+ * The current contents of the cursor's buffer will be useless for a
+ * future call, we're about to overwrite it -- trash it rather than
+ * try and make it look correct.
+ */
+ logc->bp_rlen = 0;
+
+ /*
+ * If the requested LSN is greater than the region buffer's first
+ * byte, we know the entire record is in the buffer on a good LSN.
+ *
+ * If we're given a bad LSN, the "entire" record might not be in
+ * our buffer in order to fail at the chksum. __logc_hdrchk made
+ * sure our dest buffer fits, via bp_maxrec, but we also need to
+ * make sure we don't run off the end of this buffer, the src.
+ *
+ * There is one case where the header check can fail: on a scan through
+ * in-memory logs, when we reach the end of a file we can read an empty
+ * header. In that case, it's safe to return zero, here: it will be
+ * caught in our caller. Otherwise, the LSN is bogus. Fail hard.
+ */
+ if (lp->db_log_inmemory || LOG_COMPARE(lsn, &lp->f_lsn) > 0) {
+ if (!lp->db_log_inmemory)
+ b_region = lsn->offset - lp->w_off;
+ __log_inmem_copyout(dblp, b_region, hdr, hdr->size);
+ if (LOG_SWAPPED(env))
+ __log_hdrswap(hdr, CRYPTO_ON(env));
+ if (__logc_hdrchk(logc, lsn, hdr, &eof) != 0)
+ return (DB_NOTFOUND);
+ if (eof)
+ return (0);
+ if (lp->db_log_inmemory) {
+ if (RINGBUF_LEN(lp, b_region, lp->b_off) < hdr->len)
+ return (DB_NOTFOUND);
+ } else if (lsn->offset + hdr->len > lp->w_off + lp->buffer_size)
+ return (DB_NOTFOUND);
+ if (logc->bp_size <= hdr->len) {
+ len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128);
+ if ((ret =
+ __os_realloc(logc->env, len, &logc->bp)) != 0)
+ return (ret);
+ logc->bp_size = (u_int32_t)len;
+ }
+ __log_inmem_copyout(dblp, b_region, logc->bp, hdr->len);
+ *pp = logc->bp;
+ return (0);
+ }
+
+ DB_ASSERT(env, !lp->db_log_inmemory);
+
+ /*
+ * There's a partial record, that is, the requested record starts
+ * in a log file and finishes in the region buffer. We have to
+ * find out how many bytes of the record are in the region buffer
+ * so we can copy them out into the cursor buffer. First, check
+ * to see if the requested record is the only record in the region
+ * buffer, in which case we should copy the entire region buffer.
+ *
+ * Else, walk back through the region's buffer to find the first LSN
+ * after the record that crosses the buffer boundary -- we can detect
+ * that LSN, because its "prev" field will reference the record we
+ * want. The bytes we need to copy from the region buffer are the
+ * bytes up to the record we find. The bytes we'll need to allocate
+ * to hold the log record are the bytes between the two offsets.
+ */
+ b_disk = lp->w_off - lsn->offset;
+ if (lp->b_off <= lp->len)
+ b_region = (u_int32_t)lp->b_off;
+ else
+ for (p = dblp->bufp + (lp->b_off - lp->len);;) {
+ memcpy(hdr, p, hdr->size);
+ if (LOG_SWAPPED(env))
+ __log_hdrswap(hdr, CRYPTO_ON(env));
+ if (hdr->prev == lsn->offset) {
+ b_region = (u_int32_t)(p - dblp->bufp);
+ break;
+ }
+ p = dblp->bufp + (hdr->prev - lp->w_off);
+ }
+
+ /*
+ * If we don't have enough room for the record, we have to allocate
+ * space. We have to do it while holding the region lock, which is
+ * truly annoying, but there's no way around it. This call is why
+ * we allocate cursor buffer space when allocating the cursor instead
+ * of waiting.
+ */
+ if (logc->bp_size <= b_region + b_disk) {
+ len = (size_t)DB_ALIGN((uintmax_t)(b_region + b_disk) * 2, 128);
+ if ((ret = __os_realloc(logc->env, len, &logc->bp)) != 0)
+ return (ret);
+ logc->bp_size = (u_int32_t)len;
+ }
+
+ /* Copy the region's bytes to the end of the cursor's buffer. */
+ p = (logc->bp + logc->bp_size) - b_region;
+ memcpy(p, dblp->bufp, b_region);
+
+ /* Release the region lock. */
+ if (*rlockp == L_ACQUIRED) {
+ *rlockp = L_NONE;
+ LOG_SYSTEM_UNLOCK(env);
+ }
+
+ /*
+ * Read the rest of the information from disk. Neither short reads
+ * or EOF are acceptable, the bytes we want had better be there.
+ */
+ if (b_disk != 0) {
+ p -= b_disk;
+ nr = b_disk;
+ if ((ret = __logc_io(
+ logc, lsn->file, lsn->offset, p, &nr, NULL)) != 0)
+ return (ret);
+ if (nr < b_disk)
+ return (__logc_shortread(logc, lsn, 0));
+
+ /* We read bytes from the disk, we'll need to checksum them. */
+ *need_cksump = 1;
+ }
+
+ /* Copy the header information into the caller's structure. */
+ memcpy(hdr, p, hdr->size);
+ if (LOG_SWAPPED(env))
+ __log_hdrswap(hdr, CRYPTO_ON(env));
+
+ *pp = p;
+ return (0);
+}
+
+/*
+ * __log_hdrswap --
+ * Swap the bytes in a log header from machines with different endianness.
+ *
+ * PUBLIC: void __log_hdrswap __P((HDR *, int));
+ */
+void
+__log_hdrswap(hdr, is_hmac)
+ HDR *hdr;
+ int is_hmac;
+{
+ M_32_SWAP(hdr->prev);
+ M_32_SWAP(hdr->len);
+ if (!is_hmac)
+ P_32_SWAP(hdr->chksum);
+}
+
+/*
+ * __log_persistswap --
+ * Swap the bytes in a log file persistent header from machines with
+ * different endianness.
+ *
+ * PUBLIC: void __log_persistswap __P((LOGP *));
+ */
+void
+__log_persistswap(persist)
+ LOGP *persist;
+{
+ M_32_SWAP(persist->magic);
+ M_32_SWAP(persist->version);
+ M_32_SWAP(persist->log_size);
+ M_32_SWAP(persist->notused);
+}
+
+/*
+ * __logc_ondisk --
+ * Read a record off disk.
+ */
+static int
+__logc_ondisk(logc, lsn, last_lsn, flags, hdr, pp, eofp)
+ DB_LOGC *logc;
+ DB_LSN *lsn, *last_lsn;
+ u_int32_t flags;
+ int *eofp;
+ HDR *hdr;
+ u_int8_t **pp;
+{
+ ENV *env;
+ size_t len, nr;
+ u_int32_t offset;
+ int ret;
+
+ env = logc->env;
+ *eofp = 0;
+
+ nr = hdr->size;
+ if ((ret =
+ __logc_io(logc, lsn->file, lsn->offset, hdr, &nr, eofp)) != 0)
+ return (ret);
+ if (*eofp)
+ return (0);
+
+ if (LOG_SWAPPED(env))
+ __log_hdrswap(hdr, CRYPTO_ON(env));
+
+ /*
+ * If the read was successful, but we can't read a full header, assume
+ * we've hit EOF. We can't check that the header has been partially
+ * zeroed out, but it's unlikely that this is caused by a write failure
+ * since the header is written as a single write call and it's less
+ * than sector.
+ */
+ if (nr < hdr->size) {
+ *eofp = 1;
+ return (0);
+ }
+
+ /* Check the HDR. */
+ if ((ret = __logc_hdrchk(logc, lsn, hdr, eofp)) != 0)
+ return (ret);
+ if (*eofp)
+ return (0);
+
+ /*
+ * Regardless of how we return, the previous contents of the cursor's
+ * buffer are useless -- trash it.
+ */
+ logc->bp_rlen = 0;
+
+ /*
+ * Otherwise, we now (finally!) know how big the record is. (Maybe
+ * we should have just stuck the length of the record into the LSN!?)
+ * Make sure we have enough space.
+ */
+ if (logc->bp_size <= hdr->len) {
+ len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128);
+ if ((ret = __os_realloc(env, len, &logc->bp)) != 0)
+ return (ret);
+ logc->bp_size = (u_int32_t)len;
+ }
+
+ /*
+ * If we're moving forward in the log file, read this record in at the
+ * beginning of the buffer. Otherwise, read this record in at the end
+ * of the buffer, making sure we don't try and read before the start
+ * of the file. (We prefer positioning at the end because transaction
+ * aborts use DB_SET to move backward through the log and we might get
+ * lucky.)
+ *
+ * Read a buffer's worth, without reading past the logical EOF. The
+ * last_lsn may be a zero LSN, but that's OK, the test works anyway.
+ */
+ if (flags == DB_FIRST || flags == DB_NEXT)
+ offset = lsn->offset;
+ else if (lsn->offset + hdr->len < logc->bp_size)
+ offset = 0;
+ else
+ offset = (lsn->offset + hdr->len) - logc->bp_size;
+
+ nr = logc->bp_size;
+ if (lsn->file == last_lsn->file && offset + nr >= last_lsn->offset)
+ nr = last_lsn->offset - offset;
+
+ if ((ret =
+ __logc_io(logc, lsn->file, offset, logc->bp, &nr, eofp)) != 0)
+ return (ret);
+
+ /*
+ * We should have at least gotten the bytes up-to-and-including the
+ * record we're reading.
+ */
+ if (nr < (lsn->offset + hdr->len) - offset)
+ return (__logc_shortread(logc, lsn, 1));
+
+ /*
+ * Set up the return information.
+ *
+ * !!!
+ * No need to set the bp_lsn.file field, __logc_io set it for us.
+ */
+ logc->bp_rlen = (u_int32_t)nr;
+ logc->bp_lsn.offset = offset;
+
+ *pp = logc->bp + (lsn->offset - offset);
+
+ return (0);
+}
+
+/*
+ * __logc_hdrchk --
+ *
+ * Check for corrupted HDRs before we use them to allocate memory or find
+ * records.
+ *
+ * If the log files were pre-allocated, a zero-filled HDR structure is the
+ * logical file end. However, we can see buffers filled with 0's during
+ * recovery, too (because multiple log buffers were written asynchronously,
+ * and one made it to disk before a different one that logically precedes
+ * it in the log file.
+ *
+ * Check for impossibly large records. The malloc should fail later, but we
+ * have customers that run mallocs that treat all allocation failures as fatal
+ * errors.
+ *
+ * Note that none of this is necessarily something awful happening. We let
+ * the application hand us any LSN they want, and it could be a pointer into
+ * the middle of a log record, there's no way to tell.
+ */
+static int
+__logc_hdrchk(logc, lsn, hdr, eofp)
+ DB_LOGC *logc;
+ DB_LSN *lsn;
+ HDR *hdr;
+ int *eofp;
+{
+ ENV *env;
+ int ret;
+
+ env = logc->env;
+
+ /*
+ * Check EOF before we do any other processing.
+ */
+ if (eofp != NULL) {
+ if (hdr->prev == 0 && hdr->chksum[0] == 0 && hdr->len == 0) {
+ *eofp = 1;
+ return (0);
+ }
+ *eofp = 0;
+ }
+
+ /*
+ * Sanity check the log record's size.
+ * We must check it after "virtual" EOF above.
+ */
+ if (hdr->len <= hdr->size)
+ goto err;
+
+ /*
+ * If the cursor's max-record value isn't yet set, it means we aren't
+ * reading these records from a log file and no check is necessary.
+ */
+ if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) {
+ /*
+ * If we fail the check, there's the pathological case that
+ * we're reading the last file, it's growing, and our initial
+ * check information was wrong. Get it again, to be sure.
+ */
+ if ((ret = __logc_set_maxrec(logc, NULL)) != 0) {
+ __db_err(env, ret, "DB_LOGC->get");
+ return (ret);
+ }
+ if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec)
+ goto err;
+ }
+ return (0);
+
+err: if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
+ __db_errx(env, DB_STR_A("2580",
+ "DB_LOGC->get: LSN %lu/%lu: invalid log record header",
+ "%lu %lu"), (u_long)lsn->file, (u_long)lsn->offset);
+ return (EIO);
+}
+
+/*
+ * __logc_io --
+ * Read records from a log file.
+ */
+static int
+__logc_io(logc, fnum, offset, p, nrp, eofp)
+ DB_LOGC *logc;
+ u_int32_t fnum, offset;
+ void *p;
+ size_t *nrp;
+ int *eofp;
+{
+ DB_LOG *dblp;
+ ENV *env;
+ LOG *lp;
+ int ret;
+ char *np;
+
+ env = logc->env;
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+
+ /*
+ * If we've switched files, discard the current file handle and acquire
+ * a new one.
+ */
+ if (logc->fhp != NULL && logc->bp_lsn.file != fnum) {
+ ret = __os_closehandle(env, logc->fhp);
+ logc->fhp = NULL;
+ logc->bp_lsn.file = 0;
+
+ if (ret != 0)
+ return (ret);
+ }
+ if (logc->fhp == NULL) {
+ if ((ret = __log_name(dblp, fnum,
+ &np, &logc->fhp, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) {
+ /*
+ * If we're allowed to return EOF, assume that's the
+ * problem, set the EOF status flag and return 0.
+ */
+ if (eofp != NULL) {
+ *eofp = 1;
+ ret = 0;
+ } else if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
+ __db_err(env, ret, "DB_LOGC->get: %s",
+ np == NULL ? "__log_name failed" : np);
+ __os_free(env, np);
+ return (ret);
+ }
+
+ if ((ret = __logc_set_maxrec(logc, np)) != 0) {
+ __db_err(env, ret, "DB_LOGC->get: %s", np);
+ __os_free(env, np);
+ return (ret);
+ }
+ __os_free(env, np);
+
+ logc->bp_lsn.file = fnum;
+ }
+
+ STAT_INC(env, log, read, lp->stat.st_rcount, fnum);
+ /* Seek to the record's offset and read the data. */
+ if ((ret = __os_io(env, DB_IO_READ,
+ logc->fhp, 0, 0, offset, (u_int32_t)*nrp, p, nrp)) != 0) {
+ if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
+ __db_err(env, ret, DB_STR_A("2581",
+ "DB_LOGC->get: LSN: %lu/%lu: read", "%lu %lu"),
+ (u_long)fnum, (u_long)offset);
+ return (ret);
+ }
+
+ return (0);
+}
+
+/*
+ * __logc_shortread --
+ * Read was short -- return a consistent error message and error.
+ */
+static int
+__logc_shortread(logc, lsn, check_silent)
+ DB_LOGC *logc;
+ DB_LSN *lsn;
+ int check_silent;
+{
+ if (!check_silent || !F_ISSET(logc, DB_LOG_SILENT_ERR))
+ __db_errx(logc->env, DB_STR_A("2582",
+ "DB_LOGC->get: LSN: %lu/%lu: short read", "%lu %lu"),
+ (u_long)lsn->file, (u_long)lsn->offset);
+ return (EIO);
+}
+
+/*
+ * __logc_set_maxrec --
+ * Bound the maximum log record size in a log file.
+ */
+static int
+__logc_set_maxrec(logc, np)
+ DB_LOGC *logc;
+ char *np;
+{
+ DB_LOG *dblp;
+ ENV *env;
+ LOG *lp;
+ u_int32_t mbytes, bytes;
+ int ret;
+
+ env = logc->env;
+ dblp = env->lg_handle;
+
+ /*
+ * We don't want to try and allocate huge chunks of memory because
+ * applications with error-checking malloc's often consider that a
+ * hard failure. If we're about to look at a corrupted record with
+ * a bizarre size, we need to know before trying to allocate space
+ * to hold it. We could read the persistent data at the beginning
+ * of the file but that's hard -- we may have to decrypt it, checksum
+ * it and so on. Stat the file instead.
+ */
+ if (logc->fhp != NULL) {
+ if ((ret = __os_ioinfo(env, np, logc->fhp,
+ &mbytes, &bytes, NULL)) != 0)
+ return (ret);
+ if (logc->bp_maxrec < (mbytes * MEGABYTE + bytes))
+ logc->bp_maxrec = mbytes * MEGABYTE + bytes;
+ }
+
+ /*
+ * If reading from the log file currently being written, we could get
+ * an incorrect size, that is, if the cursor was opened on the file
+ * when it had only a few hundred bytes, and then the cursor used to
+ * move forward in the file, after more log records were written, the
+ * original stat value would be wrong. Use the maximum of the current
+ * log file size and the size of the buffer -- that should represent
+ * the max of any log record currently in the file.
+ *
+ * The log buffer size is set when the environment is opened and never
+ * changed, we don't need a lock on it.
+ */
+ lp = dblp->reginfo.primary;
+ if (logc->bp_maxrec < lp->buffer_size)
+ logc->bp_maxrec = lp->buffer_size;
+
+ return (0);
+}
+
+/*
+ * PUBLIC: int __log_read_record_pp __P((DB_ENV *, DB **, void *, void *,
+ * PUBLIC: DB_LOG_RECSPEC *, u_int32_t, void **));
+ */
+int
+__log_read_record_pp(dbenv, dbpp, td, recbuf, spec, size, argpp)
+ DB_ENV *dbenv;
+ DB **dbpp;
+ void *td;
+ void *recbuf;
+ DB_LOG_RECSPEC *spec;
+ u_int32_t size;
+ void **argpp;
+{
+ DB_THREAD_INFO *ip;
+ int ret;
+
+ ENV_REQUIRES_CONFIG(dbenv->env,
+ dbenv->env->lg_handle, "DB_ENV->log_read_record", DB_INIT_LOG);
+
+ *argpp = NULL;
+ ENV_ENTER(dbenv->env, ip);
+ if ((ret = __os_umalloc(dbenv->env, size + sizeof(DB_TXN), argpp)) != 0)
+ goto done;
+ REPLICATION_WRAP(dbenv->env, (__log_read_record(dbenv->env, dbpp,
+ td, recbuf, spec, size, argpp)), 0, ret);
+ if (ret != 0) {
+ __os_ufree(dbenv->env, *argpp);
+ *argpp = NULL;
+ }
+done: ENV_LEAVE(dbenv->env, ip);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __log_read_record __P((ENV *, DB **, void *, void *,
+ * PUBLIC: DB_LOG_RECSPEC *, u_int32_t, void **));
+ */
+int
+__log_read_record(env, dbpp, td, recbuf, spec, size, argpp)
+ ENV *env;
+ DB **dbpp;
+ void *td;
+ void *recbuf;
+ DB_LOG_RECSPEC *spec;
+ u_int32_t size;
+ void **argpp;
+{
+ DB_LOG_RECSPEC *sp, *np;
+ DB_TXN *txnp;
+ LOG *lp;
+ PAGE *hdrstart;
+ u_int32_t hdrsize, op, uinttmp;
+ u_int8_t *ap, *bp;
+ int has_data, ret, downrev;
+
+ COMPQUIET(has_data, 0);
+ COMPQUIET(hdrsize, 0);
+ COMPQUIET(hdrstart, NULL);
+ COMPQUIET(op, 0);
+ ap = *argpp;
+ /*
+ * Allocate space for the arg structure and a transaction
+ * structure which will imediately follow it.
+ */
+ if (ap == NULL &&
+ (ret = __os_malloc(env, size + sizeof(DB_TXN), &ap)) != 0)
+ return (ret);
+ txnp = (DB_TXN *)(ap + size);
+ memset(txnp, 0, sizeof(DB_TXN));
+ txnp->td = td;
+ lp = env->lg_handle->reginfo.primary;
+ downrev = lp->persist.version < DB_LOGVERSION_50;
+
+ bp = recbuf;
+
+ /*
+ * The first three fields are always the same in every arg
+ * struct so we know their offsets.
+ */
+ /* type */
+ LOGCOPY_32(env, ap + SSZ(LOG_REC_HEADER, type), bp);
+ bp += sizeof(u_int32_t);
+
+ /* txnp */
+ LOGCOPY_32(env, &txnp->txnid, bp);
+ *(DB_TXN **)(ap + SSZ(LOG_REC_HEADER, txnp)) = txnp;
+ bp += sizeof(txnp->txnid);
+
+ /* Previous LSN */
+ LOGCOPY_TOLSN(env,
+ (DB_LSN *)(ap + SSZ(LOG_REC_HEADER, prev_lsn)), bp);
+ bp += sizeof(DB_LSN);
+
+ ret = 0;
+ for (sp = spec; sp->type != LOGREC_Done; sp++) {
+ switch (sp->type) {
+ case LOGREC_DB:
+ LOGCOPY_32(env, &uinttmp, bp);
+ *(u_int32_t*)(ap + sp->offset) = uinttmp;
+ bp += sizeof(uinttmp);
+ if (dbpp != NULL) {
+ *dbpp = NULL;
+ ret = __dbreg_id_to_db(env,
+ txnp, dbpp, (int32_t)uinttmp, 1);
+ }
+ break;
+
+ case LOGREC_ARG:
+ case LOGREC_TIME:
+ case LOGREC_DBOP:
+ LOGCOPY_32(env, ap + sp->offset, bp);
+ bp += sizeof(uinttmp);
+ break;
+ case LOGREC_OP:
+ LOGCOPY_32(env, &op, bp);
+ *(u_int32_t *)(ap + sp->offset) = op;
+ bp += sizeof(uinttmp);
+ break;
+ case LOGREC_DBT:
+ case LOGREC_PGLIST:
+ case LOGREC_LOCKS:
+ case LOGREC_HDR:
+ case LOGREC_DATA:
+ case LOGREC_PGDBT:
+ case LOGREC_PGDDBT:
+ memset(ap + sp->offset, 0, sizeof(DBT));
+ LOGCOPY_32(env, &uinttmp, bp);
+ *(u_int32_t*)
+ (ap + sp->offset + SSZ(DBT, size)) = uinttmp;
+ bp += sizeof(u_int32_t);
+ *(void **)(ap + sp->offset + SSZ(DBT, data)) = bp;
+
+ /* Process fields that need to be byte swapped. */
+ switch (sp->type) {
+ case LOGREC_DBT:
+ case LOGREC_PGLIST:
+ case LOGREC_LOCKS:
+ break;
+ case LOGREC_HDR:
+ if (uinttmp == 0)
+ break;
+ has_data = 0;
+ for (np = sp + 1; np->type != LOGREC_Done; np++)
+ if (np->type == LOGREC_DATA) {
+ has_data = 1;
+ break;
+ }
+ hdrstart = (PAGE *)bp;
+ hdrsize = uinttmp;
+ if (has_data == 1)
+ break;
+ /* FALLTHROUGH */
+ case LOGREC_DATA:
+ if (downrev ? LOG_SWAPPED(env) :
+ (dbpp != NULL && *dbpp != NULL &&
+ F_ISSET(*dbpp, DB_AM_SWAP)))
+ __db_recordswap(op, hdrsize,
+ hdrstart, has_data ?
+ ap + sp->offset : NULL, 1);
+ break;
+ case LOGREC_PGDBT:
+ has_data = 0;
+ for (np = sp + 1; np->type != LOGREC_Done; np++)
+ if (np->type == LOGREC_PGDDBT) {
+ has_data = 1;
+ break;
+ }
+
+ hdrstart = (PAGE *)bp;
+ hdrsize = uinttmp;
+ if (has_data == 1)
+ break;
+ /* FALLTHROUGH */
+ case LOGREC_PGDDBT:
+ if (dbpp != NULL && *dbpp != NULL &&
+ (downrev ? LOG_SWAPPED(env) :
+ F_ISSET(*dbpp, DB_AM_SWAP)) &&
+ (ret = __db_pageswap(env, *dbpp, hdrstart,
+ hdrsize, has_data == 0 ? NULL :
+ (DBT *)(ap + sp->offset), 1)) != 0)
+ return (ret);
+ break;
+ default:
+ DB_ASSERT(env, sp->type != sp->type);
+ }
+
+ bp += uinttmp;
+ break;
+
+ case LOGREC_POINTER:
+ LOGCOPY_TOLSN(env, (DB_LSN *)(ap + sp->offset), bp);
+ bp += sizeof(DB_LSN);
+ break;
+
+ default:
+ DB_ASSERT(env, sp->type != sp->type);
+ }
+ }
+
+ *argpp = ap;
+ return (ret);
+}
diff --git a/src/log/log_method.c b/src/log/log_method.c
new file mode 100644
index 00000000..d5aec116
--- /dev/null
+++ b/src/log/log_method.c
@@ -0,0 +1,533 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/log.h"
+
+/*
+ * __log_env_create --
+ * Log specific initialization of the DB_ENV structure.
+ *
+ * PUBLIC: int __log_env_create __P((DB_ENV *));
+ */
+int
+__log_env_create(dbenv)
+ DB_ENV *dbenv;
+{
+ /*
+ * !!!
+ * Our caller has not yet had the opportunity to reset the panic
+ * state or turn off mutex locking, and so we can neither check
+ * the panic state or acquire a mutex in the DB_ENV create path.
+ */
+ dbenv->lg_bsize = 0;
+ dbenv->lg_regionmax = 0;
+
+ return (0);
+}
+
+/*
+ * __log_env_destroy --
+ * Log specific destruction of the DB_ENV structure.
+ *
+ * PUBLIC: void __log_env_destroy __P((DB_ENV *));
+ */
+void
+__log_env_destroy(dbenv)
+ DB_ENV *dbenv;
+{
+ COMPQUIET(dbenv, NULL);
+}
+
+/*
+ * PUBLIC: int __log_get_lg_bsize __P((DB_ENV *, u_int32_t *));
+ */
+int
+__log_get_lg_bsize(dbenv, lg_bsizep)
+ DB_ENV *dbenv;
+ u_int32_t *lg_bsizep;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_NOT_CONFIGURED(env,
+ env->lg_handle, "DB_ENV->get_lg_bsize", DB_INIT_LOG);
+
+ if (LOGGING_ON(env)) {
+ /* Cannot be set after open, no lock required to read. */
+ *lg_bsizep =
+ ((LOG *)env->lg_handle->reginfo.primary)->buffer_size;
+ } else
+ *lg_bsizep = dbenv->lg_bsize;
+ return (0);
+}
+
+/*
+ * __log_set_lg_bsize --
+ * DB_ENV->set_lg_bsize.
+ *
+ * PUBLIC: int __log_set_lg_bsize __P((DB_ENV *, u_int32_t));
+ */
+int
+__log_set_lg_bsize(dbenv, lg_bsize)
+ DB_ENV *dbenv;
+ u_int32_t lg_bsize;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lg_bsize");
+
+ dbenv->lg_bsize = lg_bsize;
+ return (0);
+}
+
+/*
+ * PUBLIC: int __log_get_lg_filemode __P((DB_ENV *, int *));
+ */
+int
+__log_get_lg_filemode(dbenv, lg_modep)
+ DB_ENV *dbenv;
+ int *lg_modep;
+{
+ DB_LOG *dblp;
+ DB_THREAD_INFO *ip;
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_NOT_CONFIGURED(env,
+ env->lg_handle, "DB_ENV->get_lg_filemode", DB_INIT_LOG);
+
+ if (LOGGING_ON(env)) {
+ dblp = env->lg_handle;
+ ENV_ENTER(env, ip);
+ LOG_SYSTEM_LOCK(env);
+ *lg_modep = ((LOG *)dblp->reginfo.primary)->filemode;
+ LOG_SYSTEM_UNLOCK(env);
+ ENV_LEAVE(env, ip);
+ } else
+ *lg_modep = dbenv->lg_filemode;
+
+ return (0);
+}
+
+/*
+ * __log_set_lg_filemode --
+ * DB_ENV->set_lg_filemode.
+ *
+ * PUBLIC: int __log_set_lg_filemode __P((DB_ENV *, int));
+ */
+int
+__log_set_lg_filemode(dbenv, lg_mode)
+ DB_ENV *dbenv;
+ int lg_mode;
+{
+ DB_LOG *dblp;
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ LOG *lp;
+
+ env = dbenv->env;
+
+ ENV_NOT_CONFIGURED(env,
+ env->lg_handle, "DB_ENV->set_lg_filemode", DB_INIT_LOG);
+
+ if (LOGGING_ON(env)) {
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+ ENV_ENTER(env, ip);
+ LOG_SYSTEM_LOCK(env);
+ lp->filemode = lg_mode;
+ LOG_SYSTEM_UNLOCK(env);
+ ENV_LEAVE(env, ip);
+ } else
+ dbenv->lg_filemode = lg_mode;
+
+ return (0);
+}
+
+/*
+ * PUBLIC: int __log_get_lg_max __P((DB_ENV *, u_int32_t *));
+ */
+int
+__log_get_lg_max(dbenv, lg_maxp)
+ DB_ENV *dbenv;
+ u_int32_t *lg_maxp;
+{
+ DB_LOG *dblp;
+ DB_THREAD_INFO *ip;
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_NOT_CONFIGURED(env,
+ env->lg_handle, "DB_ENV->get_lg_max", DB_INIT_LOG);
+
+ if (LOGGING_ON(env)) {
+ dblp = env->lg_handle;
+ ENV_ENTER(env, ip);
+ LOG_SYSTEM_LOCK(env);
+ *lg_maxp = ((LOG *)dblp->reginfo.primary)->log_nsize;
+ LOG_SYSTEM_UNLOCK(env);
+ ENV_LEAVE(env, ip);
+ } else
+ *lg_maxp = dbenv->lg_size;
+
+ return (0);
+}
+
+/*
+ * __log_set_lg_max --
+ * DB_ENV->set_lg_max.
+ *
+ * PUBLIC: int __log_set_lg_max __P((DB_ENV *, u_int32_t));
+ */
+int
+__log_set_lg_max(dbenv, lg_max)
+ DB_ENV *dbenv;
+ u_int32_t lg_max;
+{
+ DB_LOG *dblp;
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ LOG *lp;
+ int ret;
+
+ env = dbenv->env;
+ ret = 0;
+
+ ENV_NOT_CONFIGURED(env,
+ env->lg_handle, "DB_ENV->set_lg_max", DB_INIT_LOG);
+
+ if (LOGGING_ON(env)) {
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+ ENV_ENTER(env, ip);
+ if ((ret = __log_check_sizes(env, lg_max, 0)) == 0) {
+ LOG_SYSTEM_LOCK(env);
+ lp->log_nsize = lg_max;
+ LOG_SYSTEM_UNLOCK(env);
+ }
+ ENV_LEAVE(env, ip);
+ } else
+ dbenv->lg_size = lg_max;
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __log_get_lg_regionmax __P((DB_ENV *, u_int32_t *));
+ */
+int
+__log_get_lg_regionmax(dbenv, lg_regionmaxp)
+ DB_ENV *dbenv;
+ u_int32_t *lg_regionmaxp;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_NOT_CONFIGURED(env,
+ env->lg_handle, "DB_ENV->get_lg_regionmax", DB_INIT_LOG);
+
+ if (LOGGING_ON(env)) {
+ /* Cannot be set after open, no lock required to read. */
+ *lg_regionmaxp =
+ ((LOG *)env->lg_handle->reginfo.primary)->regionmax;
+ } else
+ *lg_regionmaxp = dbenv->lg_regionmax;
+ return (0);
+}
+
+/*
+ * __log_set_lg_regionmax --
+ * DB_ENV->set_lg_regionmax.
+ *
+ * PUBLIC: int __log_set_lg_regionmax __P((DB_ENV *, u_int32_t));
+ */
+int
+__log_set_lg_regionmax(dbenv, lg_regionmax)
+ DB_ENV *dbenv;
+ u_int32_t lg_regionmax;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lg_regionmax");
+
+ /* Let's not be silly. */
+ if (lg_regionmax != 0 && lg_regionmax < LG_BASE_REGION_SIZE) {
+ __db_errx(env, DB_STR_A("2569",
+ "log region size must be >= %d",
+ "%d"), LG_BASE_REGION_SIZE);
+ return (EINVAL);
+ }
+
+ dbenv->lg_regionmax = lg_regionmax;
+ return (0);
+}
+
+/*
+ * PUBLIC: int __log_get_lg_dir __P((DB_ENV *, const char **));
+ */
+int
+__log_get_lg_dir(dbenv, dirp)
+ DB_ENV *dbenv;
+ const char **dirp;
+{
+ *dirp = dbenv->db_log_dir;
+ return (0);
+}
+
+/*
+ * __log_set_lg_dir --
+ * DB_ENV->set_lg_dir.
+ *
+ * PUBLIC: int __log_set_lg_dir __P((DB_ENV *, const char *));
+ */
+int
+__log_set_lg_dir(dbenv, dir)
+ DB_ENV *dbenv;
+ const char *dir;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ if (dbenv->db_log_dir != NULL)
+ __os_free(env, dbenv->db_log_dir);
+ return (__os_strdup(env, dir, &dbenv->db_log_dir));
+}
+
+/*
+ * __log_get_flags --
+ * DB_ENV->get_flags.
+ *
+ * PUBLIC: void __log_get_flags __P((DB_ENV *, u_int32_t *));
+ */
+void
+__log_get_flags(dbenv, flagsp)
+ DB_ENV *dbenv;
+ u_int32_t *flagsp;
+{
+ DB_LOG *dblp;
+ ENV *env;
+ LOG *lp;
+ u_int32_t flags;
+
+ env = dbenv->env;
+
+ if ((dblp = env->lg_handle) == NULL)
+ return;
+
+ lp = dblp->reginfo.primary;
+
+ flags = *flagsp;
+ if (lp->db_log_autoremove)
+ LF_SET(DB_LOG_AUTO_REMOVE);
+ else
+ LF_CLR(DB_LOG_AUTO_REMOVE);
+ if (lp->db_log_inmemory)
+ LF_SET(DB_LOG_IN_MEMORY);
+ else
+ LF_CLR(DB_LOG_IN_MEMORY);
+ *flagsp = flags;
+}
+
+/*
+ * __log_set_flags --
+ * DB_ENV->set_flags.
+ *
+ * PUBLIC: void __log_set_flags __P((ENV *, u_int32_t, int));
+ */
+void
+__log_set_flags(env, flags, on)
+ ENV *env;
+ u_int32_t flags;
+ int on;
+{
+ DB_LOG *dblp;
+ LOG *lp;
+
+ if ((dblp = env->lg_handle) == NULL)
+ return;
+
+ lp = dblp->reginfo.primary;
+
+ if (LF_ISSET(DB_LOG_AUTO_REMOVE))
+ lp->db_log_autoremove = on ? 1 : 0;
+ if (LF_ISSET(DB_LOG_IN_MEMORY))
+ lp->db_log_inmemory = on ? 1 : 0;
+}
+
+/*
+ * List of flags we can handle here. DB_LOG_INMEMORY must be
+ * processed before creating the region, leave it out for now.
+ */
+#undef OK_FLAGS
+#define OK_FLAGS \
+ (DB_LOG_AUTO_REMOVE | DB_LOG_DIRECT | \
+ DB_LOG_DSYNC | DB_LOG_IN_MEMORY | DB_LOG_ZERO)
+static const FLAG_MAP LogMap[] = {
+ { DB_LOG_AUTO_REMOVE, DBLOG_AUTOREMOVE},
+ { DB_LOG_DIRECT, DBLOG_DIRECT},
+ { DB_LOG_DSYNC, DBLOG_DSYNC},
+ { DB_LOG_IN_MEMORY, DBLOG_INMEMORY},
+ { DB_LOG_ZERO, DBLOG_ZERO}
+};
+/*
+ * __log_get_config --
+ * Configure the logging subsystem.
+ *
+ * PUBLIC: int __log_get_config __P((DB_ENV *, u_int32_t, int *));
+ */
+int
+__log_get_config(dbenv, which, onp)
+ DB_ENV *dbenv;
+ u_int32_t which;
+ int *onp;
+{
+ ENV *env;
+ DB_LOG *dblp;
+ u_int32_t flags;
+
+ env = dbenv->env;
+ if (FLD_ISSET(which, ~OK_FLAGS))
+ return (__db_ferr(env, "DB_ENV->log_get_config", 0));
+ dblp = env->lg_handle;
+ ENV_REQUIRES_CONFIG(env, dblp, "DB_ENV->log_get_config", DB_INIT_LOG);
+
+ __env_fetch_flags(LogMap, sizeof(LogMap), &dblp->flags, &flags);
+ __log_get_flags(dbenv, &flags);
+ if (LF_ISSET(which))
+ *onp = 1;
+ else
+ *onp = 0;
+
+ return (0);
+}
+
+/*
+ * __log_set_config --
+ * Configure the logging subsystem.
+ *
+ * PUBLIC: int __log_set_config __P((DB_ENV *, u_int32_t, int));
+ */
+int
+__log_set_config(dbenv, flags, on)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+ int on;
+{
+ return (__log_set_config_int(dbenv, flags, on, 0));
+}
+/*
+ * __log_set_config_int --
+ * Configure the logging subsystem.
+ *
+ * PUBLIC: int __log_set_config_int __P((DB_ENV *, u_int32_t, int, int));
+ */
+int
+__log_set_config_int(dbenv, flags, on, in_open)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+ int on;
+ int in_open;
+{
+ ENV *env;
+ DB_LOG *dblp;
+ u_int32_t mapped_flags;
+
+ env = dbenv->env;
+ dblp = env->lg_handle;
+ if (FLD_ISSET(flags, ~OK_FLAGS))
+ return (__db_ferr(env, "DB_ENV->log_set_config", 0));
+ ENV_NOT_CONFIGURED(env, dblp, "DB_ENV->log_set_config", DB_INIT_LOG);
+ if (LF_ISSET(DB_LOG_DIRECT) && __os_support_direct_io() == 0) {
+ __db_errx(env,
+"DB_ENV->log_set_config: direct I/O either not configured or not supported");
+ return (EINVAL);
+ }
+
+ if (LOGGING_ON(env)) {
+ if (!in_open && LF_ISSET(DB_LOG_IN_MEMORY) &&
+ ((LOG *)dblp->reginfo.primary)->db_log_inmemory == 0)
+ ENV_ILLEGAL_AFTER_OPEN(env,
+ "DB_ENV->log_set_config: DB_LOG_IN_MEMORY");
+ __log_set_flags(env, flags, on);
+ mapped_flags = 0;
+ __env_map_flags(LogMap, sizeof(LogMap), &flags, &mapped_flags);
+ if (on)
+ F_SET(dblp, mapped_flags);
+ else
+ F_CLR(dblp, mapped_flags);
+ } else {
+ /*
+ * DB_LOG_IN_MEMORY, DB_TXN_NOSYNC and DB_TXN_WRITE_NOSYNC
+ * are mutually incompatible. If we're setting one of them,
+ * clear all current settings.
+ */
+ if (on && LF_ISSET(DB_LOG_IN_MEMORY))
+ F_CLR(dbenv,
+ DB_ENV_TXN_NOSYNC | DB_ENV_TXN_WRITE_NOSYNC);
+
+ if (on)
+ FLD_SET(dbenv->lg_flags, flags);
+ else
+ FLD_CLR(dbenv->lg_flags, flags);
+ }
+
+ return (0);
+}
+
+/*
+ * __log_check_sizes --
+ * Makes sure that the log file size and log buffer size are compatible.
+ *
+ * PUBLIC: int __log_check_sizes __P((ENV *, u_int32_t, u_int32_t));
+ */
+int
+__log_check_sizes(env, lg_max, lg_bsize)
+ ENV *env;
+ u_int32_t lg_max;
+ u_int32_t lg_bsize;
+{
+ DB_ENV *dbenv;
+ LOG *lp;
+ int inmem;
+
+ dbenv = env->dbenv;
+
+ if (LOGGING_ON(env)) {
+ lp = env->lg_handle->reginfo.primary;
+ inmem = lp->db_log_inmemory;
+ lg_bsize = lp->buffer_size;
+ } else
+ inmem = (FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) != 0);
+
+ if (inmem) {
+ if (lg_bsize == 0)
+ lg_bsize = LG_BSIZE_INMEM;
+ if (lg_max == 0)
+ lg_max = LG_MAX_INMEM;
+
+ if (lg_bsize <= lg_max) {
+ __db_errx(env,
+ "in-memory log buffer must be larger than the log file size");
+ return (EINVAL);
+ }
+ }
+
+ return (0);
+}
diff --git a/src/log/log_print.c b/src/log/log_print.c
new file mode 100644
index 00000000..d2cda519
--- /dev/null
+++ b/src/log/log_print.c
@@ -0,0 +1,380 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+#include "dbinc/lock.h"
+
+static int __log_print_dbregister __P((ENV *, DBT *, DB_LOG *));
+
+/*
+ * PUBLIC: int __log_print_record __P((ENV *,
+ * PUBLIC: DBT *, DB_LSN *, char *, DB_LOG_RECSPEC *, void *));
+ */
+int
+__log_print_record(env, recbuf, lsnp, name, spec, info)
+ ENV *env;
+ DBT *recbuf;
+ DB_LSN *lsnp;
+ char *name;
+ DB_LOG_RECSPEC *spec;
+ void *info;
+{
+ DB *dbp;
+ DBT dbt;
+ DB_LOG_RECSPEC *sp, *np;
+ DB_LOG *dblp;
+ DB_LSN prev_lsn;
+ DB_MSGBUF msgbuf;
+ LOG *lp;
+ PAGE *hdrstart, *hdrtmp;
+ int32_t inttmp;
+ u_int32_t hdrsize, op, uinttmp;
+ u_int32_t type, txnid;
+ u_int8_t *bp, *datatmp;
+ int has_data, ret, downrev;
+ struct tm *lt;
+ time_t timeval;
+ char time_buf[CTIME_BUFLEN], *s;
+ const char *hdrname;
+
+ COMPQUIET(hdrstart, NULL);
+ COMPQUIET(hdrname, NULL);
+ COMPQUIET(hdrsize, 0);
+ COMPQUIET(has_data, 0);
+ COMPQUIET(op, 0);
+
+ bp = recbuf->data;
+ dblp = info;
+ dbp = NULL;
+ lp = env->lg_handle->reginfo.primary;
+ downrev = lp->persist.version < DB_LOGVERSION_50;
+ DB_MSGBUF_INIT(&msgbuf);
+
+ /*
+ * The first three fields are always the same in every arg
+ * struct so we know their offsets.
+ */
+ /* type */
+ LOGCOPY_32(env, &type, bp);
+ bp += sizeof(u_int32_t);
+
+ /* txnp */
+ LOGCOPY_32(env, &txnid, bp);
+ bp += sizeof(txnid);
+
+ /* Previous LSN */
+ LOGCOPY_TOLSN(env,&prev_lsn, bp);
+ bp += sizeof(DB_LSN);
+ __db_msgadd(env, &msgbuf,
+ "[%lu][%lu]%s%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ name, (type & DB_debug_FLAG) ? "_debug" : "",
+ (u_long)type,
+ (u_long)txnid,
+ (u_long)prev_lsn.file, (u_long)prev_lsn.offset);
+
+ for (sp = spec; sp->type != LOGREC_Done; sp++) {
+ switch (sp->type) {
+ case LOGREC_OP:
+ LOGCOPY_32(env, &op, bp);
+ __db_msgadd(env, &msgbuf, "\t%s: ", sp->name);
+ __db_msgadd(env, &msgbuf, sp->fmt, OP_MODE_GET(op));
+ __db_msgadd(env, &msgbuf, " ptype: %s\n",
+ __db_pagetype_to_string(OP_PAGE_GET(op)));
+ bp += sizeof(uinttmp);
+ break;
+ case LOGREC_DB:
+ LOGCOPY_32(env, &inttmp, bp);
+ __db_msgadd(env, &msgbuf, "\t%s: %lu\n",
+ sp->name, (unsigned long)inttmp);
+ bp += sizeof(inttmp);
+ if (dblp != NULL && inttmp < dblp->dbentry_cnt)
+ dbp = dblp->dbentry[inttmp].dbp;
+ break;
+
+ case LOGREC_DBOP:
+ /* Special op for dbreg_register records. */
+ if (dblp != NULL && (ret =
+ __log_print_dbregister(env, recbuf, dblp)) != 0)
+ return (ret);
+ LOGCOPY_32(env, &uinttmp, bp);
+ switch (FLD_ISSET(uinttmp, DBREG_OP_MASK)) {
+ case DBREG_CHKPNT:
+ s = "CHKPNT";
+ break;
+ case DBREG_CLOSE:
+ s = "CLOSE";
+ break;
+ case DBREG_OPEN:
+ s = "OPEN";
+ break;
+ case DBREG_PREOPEN:
+ s = "PREOPEN";
+ break;
+ case DBREG_RCLOSE:
+ s = "RCLOSE";
+ break;
+ case DBREG_REOPEN:
+ s = "REOPEN";
+ break;
+ case DBREG_XCHKPNT:
+ s = "XCHKPNT";
+ break;
+ case DBREG_XOPEN:
+ s = "XOPEN";
+ break;
+ case DBREG_XREOPEN:
+ s = "XREOPEN";
+ break;
+ default:
+ s = "UNKNOWN";
+ break;
+ }
+ __db_msgadd(env, &msgbuf, "\t%s: %s %lx\n", sp->name,
+ s, (unsigned long)(uinttmp & ~DBREG_OP_MASK));
+ bp += sizeof(uinttmp);
+ break;
+ case LOGREC_ARG:
+ LOGCOPY_32(env, &uinttmp, bp);
+ __db_msgadd(env, &msgbuf, "\t%s: ", sp->name);
+ __db_msgadd(env, &msgbuf, sp->fmt, uinttmp);
+ __db_msgadd(env, &msgbuf, "\n");
+ bp += sizeof(uinttmp);
+ break;
+ case LOGREC_TIME:
+ /* time_t is long but we only store 32 bits. */
+ LOGCOPY_32(env, &uinttmp, bp);
+ timeval = uinttmp;
+ lt = localtime(&timeval);
+ __db_msgadd(env, &msgbuf,
+ "\t%s: %ld (%.24s, 20%02lu%02lu%02lu%02lu%02lu.%02lu)\n",
+ sp->name, (long)timeval,
+ __os_ctime(&timeval, time_buf),
+ (u_long)lt->tm_year - 100, (u_long)lt->tm_mon+1,
+ (u_long)lt->tm_mday, (u_long)lt->tm_hour,
+ (u_long)lt->tm_min, (u_long)lt->tm_sec);
+ bp += sizeof(uinttmp);
+ break;
+ case LOGREC_PGDBT:
+ case LOGREC_PGDDBT:
+ case LOGREC_PGLIST:
+ case LOGREC_LOCKS:
+ case LOGREC_HDR:
+ case LOGREC_DATA:
+ case LOGREC_DBT:
+ LOGCOPY_32(env, &uinttmp, bp);
+ bp += sizeof(u_int32_t);
+ switch (sp->type) {
+ case LOGREC_HDR:
+ if (uinttmp == 0)
+ break;
+ has_data = 0;
+ for (np = sp + 1; np->type != LOGREC_Done; np++)
+ if (np->type == LOGREC_DATA) {
+ has_data = 1;
+ break;
+ }
+
+ hdrstart = (PAGE*)bp;
+ hdrsize = uinttmp;
+ hdrname = sp->name;
+ if (has_data == 1)
+ break;
+ /* FALLTHROUGH */
+ case LOGREC_DATA:
+ if (downrev ? LOG_SWAPPED(env) :
+ (dbp != NULL && F_ISSET(dbp, DB_AM_SWAP)))
+ __db_recordswap(op, hdrsize, hdrstart,
+ (has_data && uinttmp != 0) ?
+ bp : NULL, 1);
+ __db_msgadd(env, &msgbuf, "\t%s: ", hdrname);
+ __db_prbytes(env, &msgbuf,
+ (u_int8_t *)hdrstart, hdrsize);
+ if (has_data == 0 || uinttmp == 0)
+ break;
+ /* FALLTHROUGH */
+ default:
+ __db_msgadd(env, &msgbuf, "\t%s: ", sp->name);
+ pr_data:
+ __db_prbytes(env, &msgbuf, bp, uinttmp);
+ has_data = 0;
+ break;
+ case LOGREC_PGDBT:
+ has_data = 0;
+ for (np = sp + 1; np->type != LOGREC_Done; np++)
+ if (np->type == LOGREC_PGDDBT) {
+ has_data = 1;
+ break;
+ }
+
+ hdrstart = (PAGE*)bp;
+ hdrsize = uinttmp;
+ if (has_data == 1)
+ break;
+ /* FALLTHROUGH */
+ case LOGREC_PGDDBT:
+ DB_ASSERT(env, hdrstart != NULL);
+ if (dbp != NULL && (downrev ? LOG_SWAPPED(env) :
+ F_ISSET(dbp, DB_AM_SWAP))) {
+ dbt.data = bp;
+ dbt.size = uinttmp;
+ if ((ret = __db_pageswap(env, dbp,
+ hdrstart, hdrsize, has_data == 0 ?
+ NULL : &dbt, 1)) != 0)
+ return (ret);
+ }
+ if (downrev)
+ goto pr_data;
+ if (ALIGNP_INC(hdrstart,
+ sizeof(u_int32_t)) != hdrstart) {
+ if ((ret = __os_malloc(env,
+ hdrsize, &hdrtmp)) != 0)
+ return (ret);
+ memcpy(hdrtmp, hdrstart, hdrsize);
+ } else
+ hdrtmp = hdrstart;
+ if (has_data == 1 && ALIGNP_INC(bp,
+ sizeof(u_int32_t)) != bp) {
+ if ((ret = __os_malloc(env,
+ uinttmp, &datatmp)) != 0)
+ return (ret);
+ memcpy(datatmp, bp, uinttmp);
+ } else if (has_data == 1)
+ datatmp = bp;
+ else
+ datatmp = NULL;
+ if ((ret = __db_prpage_int(env, &msgbuf,
+ dbp, "\t", hdrtmp,
+ uinttmp, datatmp, DB_PR_PAGE)) != 0)
+ return (ret);
+ has_data = 0;
+ if (hdrtmp != hdrstart)
+ __os_free(env, hdrtmp);
+ if (datatmp != bp && datatmp != NULL)
+ __os_free(env, datatmp);
+ break;
+ case LOGREC_PGLIST:
+ dbt.data = bp;
+ dbt.size = uinttmp;
+ __db_pglist_print(env, &msgbuf, &dbt);
+ break;
+ case LOGREC_LOCKS:
+ dbt.data = bp;
+ dbt.size = uinttmp;
+ __lock_list_print(env, &msgbuf, &dbt);
+ break;
+ }
+ bp += uinttmp;
+ break;
+
+ case LOGREC_POINTER:
+ LOGCOPY_TOLSN(env, &prev_lsn, bp);
+ __db_msgadd(env, &msgbuf,
+ "\t%s: [%lu][%lu]\n", sp->name,
+ (u_long)prev_lsn.file, (u_long)prev_lsn.offset);
+ bp += sizeof(DB_LSN);
+ break;
+ case LOGREC_Done:
+ DB_ASSERT(env, sp->type != LOGREC_Done);
+ }
+ }
+ if (msgbuf.buf != NULL)
+ DB_MSGBUF_FLUSH(env, &msgbuf);
+ else
+ __db_msg(env, "%s", "");
+ return (0);
+}
+
+/*
+ * __log_print_dbregister --
+ * So that we can properly swap and print information from databases
+ * we generate dummy DB handles here. These are real handles that are never
+ * opened but their fileid, meta_pgno and some flags are set properly.
+ * This code uses parallel structures to those in the dbregister code.
+ * The DB_LOG handle passed in must NOT be the real environment handle
+ * since this would confuse actual running transactions if printing is
+ * done while the environment is active.
+ */
+static int
+__log_print_dbregister(env, recbuf, dblp)
+ ENV *env;
+ DBT *recbuf;
+ DB_LOG *dblp;
+{
+ __dbreg_register_args *argp;
+ DB *dbp;
+ DB_ENTRY *dbe;
+ int ret;
+
+ if ((ret = __dbreg_register_read(env, recbuf->data, &argp)) != 0)
+ return (ret);
+
+ if (dblp->dbentry_cnt <= argp->fileid &&
+ (ret = __dbreg_add_dbentry(env, dblp, NULL, argp->fileid)) != 0)
+ goto err;
+ dbe = &dblp->dbentry[argp->fileid];
+ dbp = dbe->dbp;
+
+ switch (FLD_ISSET(argp->opcode, DBREG_OP_MASK)) {
+ case DBREG_CHKPNT:
+ case DBREG_OPEN:
+ case DBREG_REOPEN:
+ case DBREG_XCHKPNT:
+ case DBREG_XOPEN:
+ case DBREG_XREOPEN:
+ if (dbp != NULL) {
+ if (memcmp(dbp->fileid,
+ argp->uid.data, DB_FILE_ID_LEN) == 0 &&
+ dbp->meta_pgno == argp->meta_pgno)
+ goto done;
+ if ((__db_close(dbp, NULL, DB_NOSYNC)) != 0)
+ goto err;
+ dbe->dbp = dbp = NULL;
+ }
+ if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
+ goto err;
+ memcpy(dbp->fileid, argp->uid.data, DB_FILE_ID_LEN);
+ dbp->meta_pgno = argp->meta_pgno;
+ F_SET(dbp, DB_AM_RECOVER);
+ /*
+ * We need to swap bytes if we are on a BIGEND machine XOR
+ * we have a BIGEND database.
+ */
+ if ((F_ISSET(env, ENV_LITTLEENDIAN) == 0) ^
+ (FLD_ISSET(argp->opcode, DBREG_BIGEND) != 0))
+ F_SET(dbp, DB_AM_SWAP);
+ if (FLD_ISSET(argp->opcode, DBREG_CHKSUM))
+ F_SET(dbp, DB_AM_CHKSUM);
+ if (FLD_ISSET(argp->opcode, DBREG_ENCRYPT))
+ F_SET(dbp, DB_AM_ENCRYPT);
+ if (FLD_ISSET(argp->opcode, DBREG_EXCL))
+ F2_SET(dbp, DB2_AM_EXCL);
+ dbe->dbp = dbp;
+ break;
+ case DBREG_CLOSE:
+ case DBREG_RCLOSE:
+ if (dbp == NULL)
+ goto err;
+ if ((__db_close(dbp, NULL, DB_NOSYNC)) != 0)
+ goto err;
+ dbe->dbp = dbp = NULL;
+ break;
+ case DBREG_PREOPEN:
+ break;
+ default:
+ DB_ASSERT(env, argp->opcode != argp->opcode);
+ }
+done:
+err:
+ __os_free(env, argp);
+ return (ret);
+}
diff --git a/src/log/log_put.c b/src/log/log_put.c
new file mode 100644
index 00000000..8f7e23d8
--- /dev/null
+++ b/src/log/log_put.c
@@ -0,0 +1,2041 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/crypto.h"
+#include "dbinc/hmac.h"
+#include "dbinc/log.h"
+#include "dbinc/txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc_auto/db_ext.h"
+
+static int __log_encrypt_record __P((ENV *, DBT *, HDR *, u_int32_t));
+static int __log_file __P((ENV *, const DB_LSN *, char *, size_t));
+static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t));
+static int __log_flush_commit __P((ENV *, const DB_LSN *, u_int32_t));
+static int __log_newfh __P((DB_LOG *, int));
+static int __log_put_next __P((ENV *,
+ DB_LSN *, const DBT *, HDR *, DB_LSN *));
+static int __log_put_record_int __P((ENV *, DB *, DB_TXN *, DB_LSN *,
+ u_int32_t, u_int32_t, u_int32_t, u_int32_t, DB_LOG_RECSPEC *, va_list));
+static int __log_putr __P((DB_LOG *,
+ DB_LSN *, const DBT *, u_int32_t, HDR *));
+static int __log_write __P((DB_LOG *, void *, u_int32_t));
+
+/*
+ * __log_put_pp --
+ * ENV->log_put pre/post processing.
+ *
+ * PUBLIC: int __log_put_pp __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t));
+ */
+int
+__log_put_pp(dbenv, lsnp, udbt, flags)
+ DB_ENV *dbenv;
+ DB_LSN *lsnp;
+ const DBT *udbt;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_REQUIRES_CONFIG(env,
+ env->lg_handle, "DB_ENV->log_put", DB_INIT_LOG);
+
+ /* Validate arguments: check for allowed flags. */
+ if ((ret = __db_fchk(env, "DB_ENV->log_put", flags,
+ DB_LOG_CHKPNT | DB_LOG_COMMIT |
+ DB_FLUSH | DB_LOG_NOCOPY | DB_LOG_WRNOSYNC)) != 0)
+ return (ret);
+
+ /* DB_LOG_WRNOSYNC and DB_FLUSH are mutually exclusive. */
+ if (LF_ISSET(DB_LOG_WRNOSYNC) && LF_ISSET(DB_FLUSH))
+ return (__db_ferr(env, "DB_ENV->log_put", 1));
+
+ /* Replication clients should never write log records. */
+ if (IS_REP_CLIENT(env)) {
+ __db_errx(env, DB_STR("2511",
+ "DB_ENV->log_put is illegal on replication clients"));
+ return (EINVAL);
+ }
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__log_put(env, lsnp, udbt, flags)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __log_put --
+ * ENV->log_put.
+ *
+ * PUBLIC: int __log_put __P((ENV *, DB_LSN *, const DBT *, u_int32_t));
+ */
+int
+__log_put(env, lsnp, udbt, flags)
+ ENV *env;
+ DB_LSN *lsnp;
+ const DBT *udbt;
+ u_int32_t flags;
+{
+ DBT *dbt, t;
+ DB_CIPHER *db_cipher;
+ DB_LOG *dblp;
+ DB_LSN lsn, old_lsn;
+ DB_REP *db_rep;
+ HDR hdr;
+ LOG *lp;
+ REP *rep;
+ int lock_held, need_free, ret;
+ u_int8_t *key;
+
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+ db_cipher = env->crypto_handle;
+ db_rep = env->rep_handle;
+ if (db_rep != NULL)
+ rep = db_rep->region;
+ else
+ rep = NULL;
+
+ dbt = &t;
+ t = *udbt;
+ lock_held = need_free = 0;
+ ZERO_LSN(old_lsn);
+ hdr.len = hdr.prev = 0;
+
+ /*
+ * In general, if we are not a rep application, but are sharing a master
+ * rep env, we should not be writing log records. However, we can allow
+ * a non-replication-aware process to join a pre-existing repmgr
+ * environment, if env handle meets repmgr's DB_THREAD requirement.
+ */
+
+ if (IS_REP_MASTER(env) && db_rep->send == NULL) {
+#ifdef HAVE_REPLICATION_THREADS
+ if (F_ISSET(env, ENV_THREAD) && APP_IS_REPMGR(env)) {
+ if ((ret = __repmgr_autostart(env)) != 0)
+ return (ret);
+ } else
+#endif
+ {
+#if !defined(DEBUG_ROP) && !defined(DEBUG_WOP)
+ __db_errx(env, DB_STR("2512",
+ "Non-replication DB_ENV handle attempting "
+ "to modify a replicated environment"));
+ return (EINVAL);
+#endif
+ }
+ }
+ DB_ASSERT(env, !IS_REP_CLIENT(env));
+
+ /*
+ * If we are coming from the logging code, we use an internal flag,
+ * DB_LOG_NOCOPY, because we know we can overwrite/encrypt the log
+ * record in place. Otherwise, if a user called log_put then we
+ * must copy it to new memory so that we know we can write it.
+ *
+ * We also must copy it to new memory if we are a replication master
+ * so that we retain an unencrypted copy of the log record to send
+ * to clients.
+ */
+ if (!LF_ISSET(DB_LOG_NOCOPY) || IS_REP_MASTER(env)) {
+ if (CRYPTO_ON(env))
+ t.size += db_cipher->adj_size(udbt->size);
+ if ((ret = __os_calloc(env, 1, t.size, &t.data)) != 0)
+ goto err;
+ need_free = 1;
+ memcpy(t.data, udbt->data, udbt->size);
+ }
+ if ((ret = __log_encrypt_record(env, dbt, &hdr, udbt->size)) != 0)
+ goto err;
+ if (CRYPTO_ON(env))
+ key = db_cipher->mac_key;
+ else
+ key = NULL;
+#ifdef HAVE_LOG_CHECKSUM
+ __db_chksum(&hdr, dbt->data, dbt->size, key, hdr.chksum);
+#endif
+
+ LOG_SYSTEM_LOCK(env);
+ lock_held = 1;
+
+ if ((ret = __log_put_next(env, &lsn, dbt, &hdr, &old_lsn)) != 0)
+ goto panic_check;
+
+ /*
+ * Assign the return LSN before dropping the region lock. Necessary
+ * in case the lsn is a begin_lsn from a TXN_DETAIL structure passed in
+ * by the logging routines. We use atomic 32-bit operations because
+ * during commit this will be a TXN_DETAIL visible_lsn field, and MVCC
+ * relies on reading the fields atomically.
+ */
+ lsnp->file = lsn.file;
+ lsnp->offset = lsn.offset;
+
+#ifdef HAVE_REPLICATION
+ if (IS_REP_MASTER(env)) {
+ __rep_newfile_args nf_args;
+ DBT newfiledbt;
+ REP_BULK bulk;
+ size_t len;
+ u_int32_t ctlflags;
+ u_int8_t buf[__REP_NEWFILE_SIZE];
+
+ /*
+ * Replication masters need to drop the lock to send messages,
+ * but want to drop and reacquire it a minimal number of times.
+ */
+ ctlflags = LF_ISSET(DB_LOG_COMMIT | DB_LOG_CHKPNT) ?
+ REPCTL_PERM : 0;
+ LOG_SYSTEM_UNLOCK(env);
+ lock_held = 0;
+ if (LF_ISSET(DB_FLUSH))
+ ctlflags |= REPCTL_FLUSH;
+
+ /*
+ * If we changed files and we're in a replicated environment,
+ * we need to inform our clients now that we've dropped the
+ * region lock.
+ *
+ * Note that a failed NEWFILE send is a dropped message that
+ * our client can handle, so we can ignore it. It's possible
+ * that the record we already put is a commit, so we don't just
+ * want to return failure.
+ */
+ if (!IS_ZERO_LSN(old_lsn)) {
+ memset(&newfiledbt, 0, sizeof(newfiledbt));
+ nf_args.version = lp->persist.version;
+ (void)__rep_newfile_marshal(env, &nf_args,
+ buf, __REP_NEWFILE_SIZE, &len);
+ DB_INIT_DBT(newfiledbt, buf, len);
+ (void)__rep_send_message(env, DB_EID_BROADCAST,
+ REP_NEWFILE, &old_lsn, &newfiledbt, 0, 0);
+ }
+
+ /*
+ * If we're doing bulk processing put it in the bulk buffer.
+ */
+ ret = 0;
+ if (FLD_ISSET(rep->config, REP_C_BULK)) {
+ /*
+ * Bulk could have been turned on by another process.
+ * If so, set the address into the bulk region now.
+ */
+ if (db_rep->bulk == NULL)
+ db_rep->bulk = R_ADDR(&dblp->reginfo,
+ lp->bulk_buf);
+ memset(&bulk, 0, sizeof(bulk));
+ bulk.addr = db_rep->bulk;
+ bulk.offp = &lp->bulk_off;
+ bulk.len = lp->bulk_len;
+ bulk.lsn = lsn;
+ bulk.type = REP_BULK_LOG;
+ bulk.eid = DB_EID_BROADCAST;
+ bulk.flagsp = &lp->bulk_flags;
+ ret = __rep_bulk_message(env, &bulk, NULL,
+ &lsn, udbt, ctlflags);
+ }
+ if (!FLD_ISSET(rep->config, REP_C_BULK) ||
+ ret == DB_REP_BULKOVF) {
+ /*
+ * Then send the log record itself on to our clients.
+ */
+ /*
+ * !!!
+ * In the crypto case, we MUST send the udbt, not the
+ * now-encrypted dbt. Clients have no way to decrypt
+ * without the header.
+ */
+ ret = __rep_send_message(env, DB_EID_BROADCAST,
+ REP_LOG, &lsn, udbt, ctlflags, 0);
+ }
+ if (FLD_ISSET(ctlflags, REPCTL_PERM)) {
+ LOG_SYSTEM_LOCK(env);
+#ifdef HAVE_STATISTICS
+ if (IS_USING_LEASES(env))
+ rep->stat.st_lease_sends++;
+#endif
+ /*
+ * Keep track of our last PERM lsn. Set this on a
+ * master under the log lock. When using leases, if
+ * we set max_perm_lsn too early (before the send)
+ * then we hit a lot of false invalid lease checks
+ * which all try to refresh and hurt performance.
+ */
+ if (LOG_COMPARE(&lp->max_perm_lsn, &lsn) < 0)
+ lp->max_perm_lsn = lsn;
+ LOG_SYSTEM_UNLOCK(env);
+ }
+ /*
+ * If the send fails and we're a commit or checkpoint,
+ * there's nothing we can do; the record's in the log.
+ * Flush it, even if we're running with TXN_NOSYNC,
+ * on the grounds that it should be in durable
+ * form somewhere.
+ */
+ if (ret != 0 && FLD_ISSET(ctlflags, REPCTL_PERM))
+ LF_SET(DB_FLUSH);
+ /*
+ * We ignore send failures so reset 'ret' to 0 here.
+ * We needed to check special return values from
+ * bulk transfer and errors from either bulk or normal
+ * message sending need flushing on perm records. But
+ * otherwise we need to ignore it and reset it now.
+ */
+ ret = 0;
+ }
+#endif
+
+ /*
+ * If needed, do a flush. Note that failures at this point
+ * are only permissible if we know we haven't written a commit
+ * record; __log_flush_commit is responsible for enforcing this.
+ *
+ * If a flush is not needed, see if WRITE_NOSYNC was set and we
+ * need to write out the log buffer.
+ */
+ if (LF_ISSET(DB_FLUSH | DB_LOG_WRNOSYNC)) {
+ if (!lock_held) {
+ LOG_SYSTEM_LOCK(env);
+ lock_held = 1;
+ }
+ if ((ret = __log_flush_commit(env, &lsn, flags)) != 0)
+ goto panic_check;
+ }
+
+ /*
+ * If flushed a checkpoint record, reset the "bytes since the last
+ * checkpoint" counters.
+ */
+ if (LF_ISSET(DB_LOG_CHKPNT))
+ lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
+
+ /* Increment count of records added to the log. */
+ STAT(++lp->stat.st_record);
+
+ if (0) {
+panic_check: /*
+ * Writing log records cannot fail if we're a replication
+ * master. The reason is that once we send the record to
+ * replication clients, the transaction can no longer
+ * abort, otherwise the master would be out of sync with
+ * the rest of the replication group. Panic the system.
+ */
+ if (ret != 0 && IS_REP_MASTER(env))
+ ret = __env_panic(env, ret);
+ }
+
+err: if (lock_held)
+ LOG_SYSTEM_UNLOCK(env);
+ if (need_free)
+ __os_free(env, dbt->data);
+
+ /*
+ * If auto-remove is set and we switched files, remove unnecessary
+ * log files.
+ */
+ if (ret == 0 && !IS_ZERO_LSN(old_lsn) && lp->db_log_autoremove)
+ __log_autoremove(env);
+
+ return (ret);
+}
+
+/*
+ * __log_current_lsn_int --
+ * internal operations of __log_current_lsn
+ *
+ * PUBLIC: int __log_current_lsn_int
+ * PUBLIC: __P((ENV *, DB_LSN *, u_int32_t *, u_int32_t *));
+ */
+int
+__log_current_lsn_int(env, lsnp, mbytesp, bytesp)
+ ENV *env;
+ DB_LSN *lsnp;
+ u_int32_t *mbytesp, *bytesp;
+{
+ DB_LOG *dblp;
+ LOG *lp;
+
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+
+ LOG_SYSTEM_LOCK(env);
+
+ /*
+ * We need the LSN of the last entry in the log.
+ *
+ * Typically, it's easy to get the last written LSN, you simply look
+ * at the current log pointer and back up the number of bytes of the
+ * last log record. However, if the last thing we did was write the
+ * log header of a new log file, then, this doesn't work, so we return
+ * the first log record that will be written in this new file.
+ */
+ *lsnp = lp->lsn;
+ if (lp->lsn.offset > lp->len)
+ lsnp->offset -= lp->len;
+
+ /*
+ * Since we're holding the log region lock, return the bytes put into
+ * the log since the last checkpoint, transaction checkpoint needs it.
+ *
+ * We add the current buffer offset so as to count bytes that have not
+ * yet been written, but are sitting in the log buffer.
+ */
+ if (mbytesp != NULL) {
+ *mbytesp = lp->stat.st_wc_mbytes;
+ *bytesp = (u_int32_t)(lp->stat.st_wc_bytes + lp->b_off);
+ }
+
+ LOG_SYSTEM_UNLOCK(env);
+
+ return (0);
+}
+
+/*
+ * __log_current_lsn --
+ * Return the current LSN.
+ *
+ * PUBLIC: int __log_current_lsn
+ * PUBLIC: __P((ENV *, DB_LSN *, u_int32_t *, u_int32_t *));
+ */
+int
+__log_current_lsn(env, lsnp, mbytesp, bytesp)
+ ENV *env;
+ DB_LSN *lsnp;
+ u_int32_t *mbytesp, *bytesp;
+{
+ DB_THREAD_INFO *ip;
+ int ret;
+
+ ret = 0;
+ ENV_ENTER(env, ip);
+ ret = __log_current_lsn_int(env, lsnp, mbytesp, bytesp);
+ ENV_LEAVE(env, ip);
+
+ return ret;
+}
+
+/*
+ * __log_put_next --
+ * Put the given record as the next in the log, wherever that may
+ * turn out to be.
+ */
+static int
+__log_put_next(env, lsn, dbt, hdr, old_lsnp)
+ ENV *env;
+ DB_LSN *lsn;
+ const DBT *dbt;
+ HDR *hdr;
+ DB_LSN *old_lsnp;
+{
+ DB_LOG *dblp;
+ DB_LSN old_lsn;
+ LOG *lp;
+ int adv_file, newfile, ret;
+
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+
+ /*
+ * Save a copy of lp->lsn before we might decide to switch log
+ * files and change it. If we do switch log files, and we're
+ * doing replication, we'll need to tell our clients about the
+ * switch, and they need to receive a NEWFILE message
+ * with this "would-be" LSN in order to know they're not
+ * missing any log records.
+ */
+ old_lsn = lp->lsn;
+ newfile = 0;
+ adv_file = 0;
+ /*
+ * If our current log is at an older version and we want to write
+ * a record then we need to advance the log.
+ */
+ if (lp->persist.version != DB_LOGVERSION) {
+ __log_set_version(env, DB_LOGVERSION);
+ adv_file = 1;
+ }
+
+ /*
+ * If this information won't fit in the file, or if we're a
+ * replication client environment and have been told to do so,
+ * swap files.
+ */
+ if (adv_file || lp->lsn.offset == 0 ||
+ lp->lsn.offset + hdr->size + dbt->size > lp->log_size) {
+ if (hdr->size + sizeof(LOGP) + dbt->size > lp->log_size) {
+ __db_errx(env, DB_STR_A("2513",
+ "DB_ENV->log_put: record larger than maximum file size (%lu > %lu)",
+ "%lu %lu"),
+ (u_long)hdr->size + sizeof(LOGP) + dbt->size,
+ (u_long)lp->log_size);
+ return (EINVAL);
+ }
+
+ if ((ret = __log_newfile(dblp, NULL, 0, 0)) != 0)
+ return (ret);
+
+ /*
+ * Flag that we switched files, in case we're a master
+ * and need to send this information to our clients.
+ * We postpone doing the actual send until we can
+ * safely release the log region lock and are doing so
+ * anyway.
+ */
+ newfile = 1;
+ }
+
+ /* If we switched log files, let our caller know where. */
+ if (newfile)
+ *old_lsnp = old_lsn;
+
+ /* Actually put the record. */
+ return (__log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len, hdr));
+}
+
+/*
+ * __log_flush_commit --
+ * Flush a record.
+ */
+static int
+__log_flush_commit(env, lsnp, flags)
+ ENV *env;
+ const DB_LSN *lsnp;
+ u_int32_t flags;
+{
+ DB_LOG *dblp;
+ DB_LSN flush_lsn;
+ HDR hdr;
+ LOG *lp;
+ int ret, t_ret;
+ size_t nr, nw;
+ u_int8_t *buffer;
+
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+ flush_lsn = *lsnp;
+
+ ret = 0;
+
+ /*
+ * DB_FLUSH:
+ * Flush a record for which the DB_FLUSH flag to log_put was set.
+ *
+ * DB_LOG_WRNOSYNC:
+ * If there's anything in the current log buffer, write it out.
+ */
+ if (LF_ISSET(DB_FLUSH))
+ ret = __log_flush_int(dblp, &flush_lsn, 1);
+ else if (!lp->db_log_inmemory && lp->b_off != 0)
+ if ((ret = __log_write(dblp,
+ dblp->bufp, (u_int32_t)lp->b_off)) == 0)
+ lp->b_off = 0;
+
+ /*
+ * If a flush supporting a transaction commit fails, we must abort the
+ * transaction. (If we aren't doing a commit, return the failure; if
+ * if the commit we care about made it to disk successfully, we just
+ * ignore the failure, because there's no way to undo the commit.)
+ */
+ if (ret == 0 || !LF_ISSET(DB_LOG_COMMIT))
+ return (ret);
+
+ if (LF_ISSET(DB_FLUSH) ?
+ flush_lsn.file != lp->s_lsn.file ||
+ flush_lsn.offset < lp->s_lsn.offset :
+ flush_lsn.file != lp->lsn.file || flush_lsn.offset < lp->w_off)
+ return (0);
+
+ if (IS_REP_MASTER(env)) {
+ __db_err(env, ret, DB_STR("2514",
+ "Write failed on MASTER commit."));
+ return (__env_panic(env, ret));
+ }
+
+ /*
+ * Else, make sure that the commit record does not get out after we
+ * abort the transaction. Do this by overwriting the commit record
+ * in the buffer. (Note that other commits in this buffer will wait
+ * until a successful write happens, we do not wake them.) We point
+ * at the right part of the buffer and write an abort record over the
+ * commit. We must then try and flush the buffer again, since the
+ * interesting part of the buffer may have actually made it out to
+ * disk before there was a failure, we can't know for sure.
+ */
+ if (flush_lsn.offset > lp->w_off) {
+ if ((t_ret = __txn_force_abort(env,
+ dblp->bufp + flush_lsn.offset - lp->w_off)) != 0)
+ return (__env_panic(env, t_ret));
+ } else {
+ /*
+ * The buffer was written, but its not on disk, we
+ * must read it back and force things from a commit
+ * state to an abort state. Lots of things could fail
+ * here and we will be left with a commit record but
+ * a panic return.
+ */
+ if (
+ (t_ret = __os_seek(env,
+ dblp->lfhp, 0, 0, flush_lsn.offset)) != 0 ||
+ (t_ret = __os_read(env, dblp->lfhp, &hdr,
+ HDR_NORMAL_SZ, &nr)) != 0 || nr != HDR_NORMAL_SZ)
+ return (__env_panic(env, t_ret == 0 ? EIO : t_ret));
+ if (LOG_SWAPPED(env))
+ __log_hdrswap(&hdr, CRYPTO_ON(env));
+ if ((t_ret = __os_malloc(env, hdr.len, &buffer)) != 0 ||
+ (t_ret = __os_seek(env,
+ dblp->lfhp, 0, 0, flush_lsn.offset)) != 0 ||
+ (t_ret = __os_read(env, dblp->lfhp, buffer,
+ hdr.len, &nr)) != 0 || nr != hdr.len ||
+ (t_ret = __txn_force_abort(env, buffer)) != 0 ||
+ (t_ret = __os_seek(env,
+ dblp->lfhp, 0, 0, flush_lsn.offset)) != 0 ||
+ (t_ret = __os_write(env, dblp->lfhp, buffer,
+ nr, &nw)) != 0 || nw != nr)
+ return (__env_panic(env, t_ret == 0 ? EIO : t_ret));
+ __os_free(env, buffer);
+ }
+ /*
+ * Try to flush the log again, if the disk just bounced then we
+ * want to be sure it does not go away again before we write the
+ * abort record.
+ */
+ (void)__log_flush_int(dblp, &flush_lsn, 0);
+
+ return (ret);
+}
+
+/*
+ * __log_newfile --
+ * Initialize and switch to a new log file. (Note that this is
+ * called both when no log yet exists and when we fill a log file.)
+ *
+ * PUBLIC: int __log_newfile __P((DB_LOG *, DB_LSN *, u_int32_t, u_int32_t));
+ */
+int
+__log_newfile(dblp, lsnp, logfile, version)
+ DB_LOG *dblp;
+ DB_LSN *lsnp;
+ u_int32_t logfile;
+ u_int32_t version;
+{
+ DBT t;
+ DB_CIPHER *db_cipher;
+ DB_LSN lsn;
+ ENV *env;
+ HDR hdr;
+ LOG *lp;
+ LOGP *tpersist;
+ int need_free, ret;
+ u_int32_t lastoff;
+ size_t tsize;
+
+ env = dblp->env;
+ lp = dblp->reginfo.primary;
+
+ /*
+ * If we're not specifying a specific log file number and we're
+ * not at the beginning of a file already, start a new one.
+ */
+ if (logfile == 0 && lp->lsn.offset != 0) {
+ /*
+ * Flush the log so this file is out and can be closed. We
+ * cannot release the region lock here because we need to
+ * protect the end of the file while we switch. In
+ * particular, a thread with a smaller record than ours
+ * could detect that there is space in the log. Even
+ * blocking that event by declaring the file full would
+ * require all threads to wait here so that the lsn.file
+ * can be moved ahead after the flush completes. This
+ * probably can be changed if we had an lsn for the
+ * previous file and one for the current, but it does not
+ * seem like this would get much more throughput, if any.
+ */
+ if ((ret = __log_flush_int(dblp, NULL, 0)) != 0)
+ return (ret);
+
+ /*
+ * Save the last known offset from the previous file, we'll
+ * need it to initialize the persistent header information.
+ */
+ lastoff = lp->lsn.offset;
+
+ /* Point the current LSN to the new file. */
+ ++lp->lsn.file;
+ lp->lsn.offset = 0;
+
+ /* Reset the file write offset. */
+ lp->w_off = 0;
+ } else
+ lastoff = 0;
+
+ /*
+ * Replication may require we reset the log file name space entirely.
+ * In that case we also force a file switch so that replication can
+ * clean up old files.
+ */
+ if (logfile != 0) {
+ lp->lsn.file = logfile;
+ lp->lsn.offset = 0;
+ lp->w_off = 0;
+ if (lp->db_log_inmemory) {
+ lsn = lp->lsn;
+ (void)__log_zero(env, &lsn);
+ } else {
+ lp->s_lsn = lp->lsn;
+ if ((ret = __log_newfh(dblp, 1)) != 0)
+ return (ret);
+ }
+ }
+
+ DB_ASSERT(env, lp->db_log_inmemory || lp->b_off == 0);
+ if (lp->db_log_inmemory &&
+ (ret = __log_inmem_newfile(dblp, lp->lsn.file)) != 0)
+ return (ret);
+
+ /*
+ * Insert persistent information as the first record in every file.
+ * Note that the previous length is wrong for the very first record
+ * of the log, but that's okay, we check for it during retrieval.
+ */
+ memset(&t, 0, sizeof(t));
+ memset(&hdr, 0, sizeof(HDR));
+
+ need_free = 0;
+ tsize = sizeof(LOGP);
+ db_cipher = env->crypto_handle;
+ if (CRYPTO_ON(env))
+ tsize += db_cipher->adj_size(tsize);
+ if ((ret = __os_calloc(env, 1, tsize, &tpersist)) != 0)
+ return (ret);
+ need_free = 1;
+ /*
+ * If we're told what version to make this file, then we
+ * need to be at that version. Update here.
+ */
+ if (version != 0) {
+ __log_set_version(env, version);
+ if ((ret = __env_init_rec(env, version)) != 0)
+ goto err;
+ }
+ lp->persist.log_size = lp->log_size = lp->log_nsize;
+ memcpy(tpersist, &lp->persist, sizeof(LOGP));
+ DB_SET_DBT(t, tpersist, tsize);
+ if (LOG_SWAPPED(env))
+ __log_persistswap(tpersist);
+
+ if ((ret =
+ __log_encrypt_record(env, &t, &hdr, (u_int32_t)tsize)) != 0)
+ goto err;
+
+ if ((ret = __log_putr(dblp, &lsn,
+ &t, lastoff == 0 ? 0 : lastoff - lp->len, &hdr)) != 0)
+ goto err;
+
+ /* Update the LSN information returned to the caller. */
+ if (lsnp != NULL)
+ *lsnp = lp->lsn;
+
+err: if (need_free)
+ __os_free(env, tpersist);
+ return (ret);
+}
+
+/*
+ * __log_putr --
+ * Actually put a record into the log.
+ */
+static int
+__log_putr(dblp, lsn, dbt, prev, h)
+ DB_LOG *dblp;
+ DB_LSN *lsn;
+ const DBT *dbt;
+ u_int32_t prev;
+ HDR *h;
+{
+ DB_CIPHER *db_cipher;
+ DB_LSN f_lsn;
+ ENV *env;
+ HDR tmp, *hdr;
+ LOG *lp;
+ int ret, t_ret;
+ db_size_t b_off;
+ size_t nr;
+ u_int32_t w_off;
+
+ env = dblp->env;
+ lp = dblp->reginfo.primary;
+
+ /*
+ * If we weren't given a header, use a local one.
+ */
+ db_cipher = env->crypto_handle;
+ if (h == NULL) {
+ hdr = &tmp;
+ memset(hdr, 0, sizeof(HDR));
+ if (CRYPTO_ON(env))
+ hdr->size = HDR_CRYPTO_SZ;
+ else
+ hdr->size = HDR_NORMAL_SZ;
+ } else
+ hdr = h;
+
+ /* Save our position in case we fail. */
+ b_off = lp->b_off;
+ w_off = lp->w_off;
+ f_lsn = lp->f_lsn;
+
+ /*
+ * Initialize the header. If we just switched files, lsn.offset will
+ * be 0, and what we really want is the offset of the previous record
+ * in the previous file. Fortunately, prev holds the value we want.
+ */
+ hdr->prev = prev;
+ hdr->len = (u_int32_t)hdr->size + dbt->size;
+
+#ifdef HAVE_LOG_CHECKSUM
+ /*
+ * If we were passed in a nonzero checksum, our caller calculated
+ * the checksum before acquiring the log mutex, as an optimization.
+ *
+ * If our caller calculated a real checksum of 0, we'll needlessly
+ * recalculate it. C'est la vie; there's no out-of-bounds value
+ * here.
+ */
+ if (hdr->chksum[0] == 0) {
+ if (lp->persist.version < DB_LOGCHKSUM)
+ __db_chksum(NULL, dbt->data, dbt->size,
+ (CRYPTO_ON(env)) ? db_cipher->mac_key : NULL,
+ hdr->chksum);
+ else
+ __db_chksum(hdr, dbt->data, dbt->size,
+ (CRYPTO_ON(env)) ? db_cipher->mac_key : NULL,
+ hdr->chksum);
+ } else if (lp->persist.version >= DB_LOGCHKSUM)
+ /*
+ * We need to include hdr->prev and len here, since they were
+ * still zero at the time of the caller's __db_chksum() call.
+ */
+ LOG_HDR_SUM(CRYPTO_ON(env), hdr, hdr->chksum);
+#endif
+
+ if (lp->db_log_inmemory && (ret = __log_inmem_chkspace(dblp,
+ (u_int32_t)hdr->size + dbt->size)) != 0)
+ goto err;
+
+ /*
+ * The offset into the log file at this point is the LSN where
+ * we're about to put this record, and is the LSN the caller wants.
+ */
+ *lsn = lp->lsn;
+
+ nr = hdr->size;
+ if (LOG_SWAPPED(env))
+ __log_hdrswap(hdr, CRYPTO_ON(env));
+
+ /* nr can't overflow a 32 bit value - header size is internal. */
+ ret = __log_fill(dblp, lsn, hdr, (u_int32_t)nr);
+
+ if (LOG_SWAPPED(env))
+ __log_hdrswap(hdr, CRYPTO_ON(env));
+
+ if (ret != 0)
+ goto err;
+
+ if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0)
+ goto err;
+
+ lp->len = (u_int32_t)(hdr->size + dbt->size);
+ lp->lsn.offset += lp->len;
+ return (0);
+err:
+ /*
+ * If we wrote more than one buffer before failing, get the
+ * first one back. The extra buffers will fail the checksums
+ * and be ignored.
+ */
+ if (w_off + lp->buffer_size < lp->w_off) {
+ DB_ASSERT(env, !lp->db_log_inmemory);
+ if ((t_ret = __os_seek(env, dblp->lfhp, 0, 0, w_off)) != 0 ||
+ (t_ret = __os_read(env, dblp->lfhp, dblp->bufp,
+ b_off, &nr)) != 0)
+ return (__env_panic(env, t_ret));
+ if (nr != b_off) {
+ __db_errx(env, DB_STR("2515",
+ "Short read while restoring log"));
+ return (__env_panic(env, EIO));
+ }
+ }
+
+ /* Reset to where we started. */
+ lp->w_off = w_off;
+ lp->b_off = b_off;
+ lp->f_lsn = f_lsn;
+
+ return (ret);
+}
+
+/*
+ * __log_flush_pp --
+ * ENV->log_flush pre/post processing.
+ *
+ * PUBLIC: int __log_flush_pp __P((DB_ENV *, const DB_LSN *));
+ */
+int
+__log_flush_pp(dbenv, lsn)
+ DB_ENV *dbenv;
+ const DB_LSN *lsn;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_REQUIRES_CONFIG(env,
+ env->lg_handle, "DB_ENV->log_flush", DB_INIT_LOG);
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__log_flush(env, lsn)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * See if we need to wait. s_lsn is not locked so some care is needed.
+ * The sync point can only move forward. The lsnp->file cannot be
+ * greater than the s_lsn.file. If the file we want is in the past
+ * we are done. If the file numbers are the same check the offset.
+ * This all assumes we can read an 32-bit quantity in one state or
+ * the other, not in transition.
+ */
+#define ALREADY_FLUSHED(lp, lsnp) \
+ (((lp)->s_lsn.file > (lsnp)->file) || \
+ ((lp)->s_lsn.file == (lsnp)->file && \
+ (lp)->s_lsn.offset > (lsnp)->offset))
+
+/*
+ * __log_flush --
+ * ENV->log_flush
+ *
+ * PUBLIC: int __log_flush __P((ENV *, const DB_LSN *));
+ */
+int
+__log_flush(env, lsn)
+ ENV *env;
+ const DB_LSN *lsn;
+{
+ DB_LOG *dblp;
+ LOG *lp;
+ int ret;
+
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+ if (lsn != NULL && ALREADY_FLUSHED(lp, lsn))
+ return (0);
+ LOG_SYSTEM_LOCK(env);
+ ret = __log_flush_int(dblp, lsn, 1);
+ LOG_SYSTEM_UNLOCK(env);
+ return (ret);
+}
+
+/*
+ * __log_flush_int --
+ * Write all records less than or equal to the specified LSN; internal
+ * version.
+ *
+ * PUBLIC: int __log_flush_int __P((DB_LOG *, const DB_LSN *, int));
+ */
+int
+__log_flush_int(dblp, lsnp, release)
+ DB_LOG *dblp;
+ const DB_LSN *lsnp;
+ int release;
+{
+ struct __db_commit *commit;
+ ENV *env;
+ DB_LSN flush_lsn, f_lsn;
+ LOG *lp;
+ size_t b_off;
+ u_int32_t ncommit, w_off;
+ int do_flush, first, ret;
+
+ env = dblp->env;
+ lp = dblp->reginfo.primary;
+ ncommit = 0;
+ ret = 0;
+
+ if (lp->db_log_inmemory) {
+ lp->s_lsn = lp->lsn;
+ STAT(++lp->stat.st_scount);
+ return (0);
+ }
+
+ /*
+ * If no LSN specified, flush the entire log by setting the flush LSN
+ * to the last LSN written in the log. Otherwise, check that the LSN
+ * isn't a non-existent record for the log.
+ */
+ if (lsnp == NULL) {
+ flush_lsn.file = lp->lsn.file;
+ flush_lsn.offset = lp->lsn.offset - lp->len;
+ } else if (lsnp->file > lp->lsn.file ||
+ (lsnp->file == lp->lsn.file &&
+ lsnp->offset > lp->lsn.offset - lp->len)) {
+ __db_errx(env, DB_STR_A("2516",
+ "DB_ENV->log_flush: LSN of %lu/%lu past current end-of-log of %lu/%lu",
+ "%lu %lu %lu %lu"), (u_long)lsnp->file,
+ (u_long)lsnp->offset, (u_long)lp->lsn.file,
+ (u_long)lp->lsn.offset);
+ __db_errx(env, DB_STR("2517",
+ "Database environment corrupt; the wrong log files may "
+ "have been removed or incompatible database files "
+ "imported from another environment"));
+ return (__env_panic(env, DB_RUNRECOVERY));
+ } else {
+ if (ALREADY_FLUSHED(lp, lsnp))
+ return (0);
+ flush_lsn = *lsnp;
+ }
+
+ /*
+ * If a flush is in progress and we're allowed to do so, drop
+ * the region lock and block waiting for the next flush.
+ */
+ if (release && lp->in_flush != 0) {
+ if ((commit = SH_TAILQ_FIRST(
+ &lp->free_commits, __db_commit)) == NULL) {
+ if ((ret = __env_alloc(&dblp->reginfo,
+ sizeof(struct __db_commit), &commit)) != 0)
+ goto flush;
+ memset(commit, 0, sizeof(*commit));
+ if ((ret = __mutex_alloc(env, MTX_TXN_COMMIT,
+ DB_MUTEX_SELF_BLOCK, &commit->mtx_txnwait)) != 0) {
+ __env_alloc_free(&dblp->reginfo, commit);
+ return (ret);
+ }
+ MUTEX_LOCK(env, commit->mtx_txnwait);
+ } else
+ SH_TAILQ_REMOVE(
+ &lp->free_commits, commit, links, __db_commit);
+
+ lp->ncommit++;
+
+ /*
+ * Flushes may be requested out of LSN order; be
+ * sure we only move lp->t_lsn forward.
+ */
+ if (LOG_COMPARE(&lp->t_lsn, &flush_lsn) < 0)
+ lp->t_lsn = flush_lsn;
+
+ commit->lsn = flush_lsn;
+ SH_TAILQ_INSERT_HEAD(
+ &lp->commits, commit, links, __db_commit);
+ LOG_SYSTEM_UNLOCK(env);
+ /* Wait here for the in-progress flush to finish. */
+ MUTEX_LOCK(env, commit->mtx_txnwait);
+ LOG_SYSTEM_LOCK(env);
+
+ lp->ncommit--;
+ /*
+ * Grab the flag before freeing the struct to see if
+ * we need to flush the log to commit. If so,
+ * use the maximal lsn for any committing thread.
+ */
+ do_flush = F_ISSET(commit, DB_COMMIT_FLUSH);
+ F_CLR(commit, DB_COMMIT_FLUSH);
+ SH_TAILQ_INSERT_HEAD(
+ &lp->free_commits, commit, links, __db_commit);
+ if (do_flush) {
+ lp->in_flush--;
+ flush_lsn = lp->t_lsn;
+ } else
+ return (0);
+ }
+
+ /*
+ * Protect flushing with its own mutex so we can release
+ * the region lock except during file switches.
+ */
+flush: MUTEX_LOCK(env, lp->mtx_flush);
+
+ /*
+ * If the LSN is less than or equal to the last-sync'd LSN, we're done.
+ * Note, the last-sync LSN saved in s_lsn is the LSN of the first byte
+ * after the byte we absolutely know was written to disk, so the test
+ * is <, not <=.
+ */
+ if (flush_lsn.file < lp->s_lsn.file ||
+ (flush_lsn.file == lp->s_lsn.file &&
+ flush_lsn.offset < lp->s_lsn.offset)) {
+ MUTEX_UNLOCK(env, lp->mtx_flush);
+ goto done;
+ }
+
+ /*
+ * We may need to write the current buffer. We have to write the
+ * current buffer if the flush LSN is greater than or equal to the
+ * buffer's starting LSN.
+ *
+ * Otherwise, it's still possible that this thread may never have
+ * written to this log file. Acquire a file descriptor if we don't
+ * already have one.
+ */
+ if (lp->b_off != 0 && LOG_COMPARE(&flush_lsn, &lp->f_lsn) >= 0) {
+ if ((ret = __log_write(dblp,
+ dblp->bufp, (u_int32_t)lp->b_off)) != 0) {
+ MUTEX_UNLOCK(env, lp->mtx_flush);
+ goto done;
+ }
+
+ lp->b_off = 0;
+ } else if (dblp->lfhp == NULL || dblp->lfname != lp->lsn.file)
+ if ((ret = __log_newfh(dblp, 0)) != 0) {
+ MUTEX_UNLOCK(env, lp->mtx_flush);
+ goto done;
+ }
+
+ /*
+ * We are going to flush, release the region.
+ * First get the current state of the buffer since
+ * another write may come in, but we may not flush it.
+ */
+ b_off = lp->b_off;
+ w_off = lp->w_off;
+ f_lsn = lp->f_lsn;
+ lp->in_flush++;
+ if (release)
+ LOG_SYSTEM_UNLOCK(env);
+
+ /* Sync all writes to disk. */
+ if ((ret = __os_fsync(env, dblp->lfhp)) != 0) {
+ MUTEX_UNLOCK(env, lp->mtx_flush);
+ if (release)
+ LOG_SYSTEM_LOCK(env);
+ lp->in_flush--;
+ goto done;
+ }
+
+ /*
+ * Set the last-synced LSN.
+ * This value must be set to the LSN past the last complete
+ * record that has been flushed. This is at least the first
+ * lsn, f_lsn. If the buffer is empty, b_off == 0, then
+ * we can move up to write point since the first lsn is not
+ * set for the new buffer.
+ */
+ lp->s_lsn = f_lsn;
+ if (b_off == 0)
+ lp->s_lsn.offset = w_off;
+
+ MUTEX_UNLOCK(env, lp->mtx_flush);
+ if (release)
+ LOG_SYSTEM_LOCK(env);
+
+ lp->in_flush--;
+ STAT(++lp->stat.st_scount);
+
+ /*
+ * How many flush calls (usually commits) did this call actually sync?
+ * At least one, if it got here.
+ */
+ ncommit = 1;
+done:
+ if (lp->ncommit != 0) {
+ first = 1;
+ SH_TAILQ_FOREACH(commit, &lp->commits, links, __db_commit)
+ if (LOG_COMPARE(&lp->s_lsn, &commit->lsn) > 0) {
+ MUTEX_UNLOCK(env, commit->mtx_txnwait);
+ SH_TAILQ_REMOVE(
+ &lp->commits, commit, links, __db_commit);
+ ncommit++;
+ } else if (first == 1) {
+ F_SET(commit, DB_COMMIT_FLUSH);
+ MUTEX_UNLOCK(env, commit->mtx_txnwait);
+ SH_TAILQ_REMOVE(
+ &lp->commits, commit, links, __db_commit);
+ /*
+ * This thread will wake and flush.
+ * If another thread commits and flushes
+ * first we will waste a trip trough the
+ * mutex.
+ */
+ lp->in_flush++;
+ first = 0;
+ }
+ }
+#ifdef HAVE_STATISTICS
+ if (lp->stat.st_maxcommitperflush < ncommit)
+ lp->stat.st_maxcommitperflush = ncommit;
+ if (lp->stat.st_mincommitperflush > ncommit ||
+ lp->stat.st_mincommitperflush == 0)
+ lp->stat.st_mincommitperflush = ncommit;
+#endif
+
+ return (ret);
+}
+
+/*
+ * __log_fill --
+ * Write information into the log.
+ */
+static int
+__log_fill(dblp, lsn, addr, len)
+ DB_LOG *dblp;
+ DB_LSN *lsn;
+ void *addr;
+ u_int32_t len;
+{
+ LOG *lp;
+ u_int32_t bsize, nrec;
+ size_t nw, remain;
+ int ret;
+
+ lp = dblp->reginfo.primary;
+ bsize = lp->buffer_size;
+
+ if (lp->db_log_inmemory) {
+ __log_inmem_copyin(dblp, lp->b_off, addr, len);
+ lp->b_off = (lp->b_off + len) % lp->buffer_size;
+ return (0);
+ }
+
+ while (len > 0) { /* Copy out the data. */
+ /*
+ * If we're beginning a new buffer, note the user LSN to which
+ * the first byte of the buffer belongs. We have to know this
+ * when flushing the buffer so that we know if the in-memory
+ * buffer needs to be flushed.
+ */
+ if (lp->b_off == 0)
+ lp->f_lsn = *lsn;
+
+ /*
+ * If we're on a buffer boundary and the data is big enough,
+ * copy as many records as we can directly from the data.
+ */
+ if (lp->b_off == 0 && len >= bsize) {
+ nrec = len / bsize;
+ if ((ret = __log_write(dblp, addr, nrec * bsize)) != 0)
+ return (ret);
+ addr = (u_int8_t *)addr + nrec * bsize;
+ len -= nrec * bsize;
+ STAT(++lp->stat.st_wcount_fill);
+ continue;
+ }
+
+ /* Figure out how many bytes we can copy this time. */
+ remain = bsize - lp->b_off;
+ nw = remain > len ? len : remain;
+ memcpy(dblp->bufp + lp->b_off, addr, nw);
+ addr = (u_int8_t *)addr + nw;
+ len -= (u_int32_t)nw;
+ lp->b_off += (u_int32_t)nw;
+
+ /* If we fill the buffer, flush it. */
+ if (lp->b_off == bsize) {
+ if ((ret = __log_write(dblp, dblp->bufp, bsize)) != 0)
+ return (ret);
+ lp->b_off = 0;
+ STAT(++lp->stat.st_wcount_fill);
+ }
+ }
+ return (0);
+}
+
+/*
+ * __log_write --
+ * Write the log buffer to disk.
+ */
+static int
+__log_write(dblp, addr, len)
+ DB_LOG *dblp;
+ void *addr;
+ u_int32_t len;
+{
+ ENV *env;
+ LOG *lp;
+ size_t nw;
+ int ret;
+
+ env = dblp->env;
+ lp = dblp->reginfo.primary;
+
+ DB_ASSERT(env, !lp->db_log_inmemory);
+
+ /*
+ * If we haven't opened the log file yet or the current one has
+ * changed, acquire a new log file. We are creating the file if we're
+ * about to write to the start of it, in other words, if the write
+ * offset is zero.
+ */
+ if (dblp->lfhp == NULL || dblp->lfname != lp->lsn.file ||
+ dblp->lf_timestamp != lp->timestamp)
+ if ((ret = __log_newfh(dblp, lp->w_off == 0)) != 0)
+ return (ret);
+
+ /*
+ * If we're writing the first block in a log file on a filesystem that
+ * guarantees unwritten blocks are zero-filled, we set the size of the
+ * file in advance. This increases sync performance on some systems,
+ * because they don't need to update metadata on every sync.
+ *
+ * Ignore any error -- we may have run out of disk space, but that's no
+ * reason to quit.
+ */
+#ifdef HAVE_FILESYSTEM_NOTZERO
+ if (lp->w_off == 0 && !__os_fs_notzero()) {
+#else
+ if (lp->w_off == 0) {
+#endif
+ (void)__db_file_extend(env, dblp->lfhp, lp->log_size);
+ if (F_ISSET(dblp, DBLOG_ZERO))
+ (void)__db_zero_extend(env, dblp->lfhp,
+ 0, lp->log_size/lp->buffer_size, lp->buffer_size);
+
+ }
+
+ /*
+ * Seek to the offset in the file (someone may have written it
+ * since we last did).
+ */
+ if ((ret = __os_io(env, DB_IO_WRITE,
+ dblp->lfhp, 0, 0, lp->w_off, len, addr, &nw)) != 0)
+ return (ret);
+
+ /* Reset the buffer offset and update the seek offset. */
+ lp->w_off += len;
+
+ /* Update written statistics. */
+ if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) {
+ lp->stat.st_wc_bytes -= MEGABYTE;
+ ++lp->stat.st_wc_mbytes;
+ }
+#ifdef HAVE_STATISTICS
+ if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
+ lp->stat.st_w_bytes -= MEGABYTE;
+ ++lp->stat.st_w_mbytes;
+ }
+ ++lp->stat.st_wcount;
+#endif
+
+ return (0);
+}
+
+/*
+ * __log_file_pp --
+ * ENV->log_file pre/post processing.
+ *
+ * PUBLIC: int __log_file_pp __P((DB_ENV *, const DB_LSN *, char *, size_t));
+ */
+int
+__log_file_pp(dbenv, lsn, namep, len)
+ DB_ENV *dbenv;
+ const DB_LSN *lsn;
+ char *namep;
+ size_t len;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret, set;
+
+ env = dbenv->env;
+
+ ENV_REQUIRES_CONFIG(env,
+ env->lg_handle, "DB_ENV->log_file", DB_INIT_LOG);
+
+ if ((ret = __log_get_config(dbenv, DB_LOG_IN_MEMORY, &set)) != 0)
+ return (ret);
+ if (set) {
+ __db_errx(env, DB_STR("2518",
+ "DB_ENV->log_file is illegal with in-memory logs"));
+ return (EINVAL);
+ }
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__log_file(env, lsn, namep, len)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __log_file --
+ * ENV->log_file.
+ */
+static int
+__log_file(env, lsn, namep, len)
+ ENV *env;
+ const DB_LSN *lsn;
+ char *namep;
+ size_t len;
+{
+ DB_LOG *dblp;
+ int ret;
+ char *name;
+
+ dblp = env->lg_handle;
+ LOG_SYSTEM_LOCK(env);
+ ret = __log_name(dblp, lsn->file, &name, NULL, 0);
+ LOG_SYSTEM_UNLOCK(env);
+ if (ret != 0)
+ return (ret);
+
+ /* Check to make sure there's enough room and copy the name. */
+ if (len < strlen(name) + 1) {
+ *namep = '\0';
+ __db_errx(env, DB_STR("2519",
+ "DB_ENV->log_file: name buffer is too short"));
+ return (EINVAL);
+ }
+ (void)strcpy(namep, name);
+ __os_free(env, name);
+
+ return (0);
+}
+
+/*
+ * __log_newfh --
+ * Acquire a file handle for the current log file.
+ */
+static int
+__log_newfh(dblp, create)
+ DB_LOG *dblp;
+ int create;
+{
+ ENV *env;
+ LOG *lp;
+ u_int32_t flags;
+ int ret;
+ logfile_validity status;
+
+ env = dblp->env;
+ lp = dblp->reginfo.primary;
+
+ /* Close any previous file descriptor. */
+ if (dblp->lfhp != NULL) {
+ (void)__os_closehandle(env, dblp->lfhp);
+ dblp->lfhp = NULL;
+ }
+
+ flags = DB_OSO_SEQ |
+ (create ? DB_OSO_CREATE : 0) |
+ (F_ISSET(dblp, DBLOG_DIRECT) ? DB_OSO_DIRECT : 0) |
+ (F_ISSET(dblp, DBLOG_DSYNC) ? DB_OSO_DSYNC : 0);
+
+ /* Get the path of the new file and open it. */
+ dblp->lfname = lp->lsn.file;
+ if ((ret = __log_valid(dblp, dblp->lfname, 0, &dblp->lfhp,
+ flags, &status, NULL)) != 0)
+ __db_err(env, ret,
+ "DB_ENV->log_newfh: %lu", (u_long)lp->lsn.file);
+ else if (status != DB_LV_NORMAL && status != DB_LV_INCOMPLETE &&
+ status != DB_LV_OLD_READABLE)
+ ret = DB_NOTFOUND;
+
+ return (ret);
+}
+
+/*
+ * __log_name --
+ * Return the log name for a particular file, and optionally open it.
+ *
+ * PUBLIC: int __log_name __P((DB_LOG *,
+ * PUBLIC: u_int32_t, char **, DB_FH **, u_int32_t));
+ */
+int
+__log_name(dblp, filenumber, namep, fhpp, flags)
+ DB_LOG *dblp;
+ u_int32_t filenumber, flags;
+ char **namep;
+ DB_FH **fhpp;
+{
+ ENV *env;
+ LOG *lp;
+ int mode, ret;
+ char *oname;
+ char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20];
+
+ env = dblp->env;
+ lp = dblp->reginfo.primary;
+
+ DB_ASSERT(env, !lp->db_log_inmemory);
+
+ /*
+ * !!!
+ * The semantics of this routine are bizarre.
+ *
+ * The reason for all of this is that we need a place where we can
+ * intercept requests for log files, and, if appropriate, check for
+ * both the old-style and new-style log file names. The trick is
+ * that all callers of this routine that are opening the log file
+ * read-only want to use an old-style file name if they can't find
+ * a match using a new-style name. The only down-side is that some
+ * callers may check for the old-style when they really don't need
+ * to, but that shouldn't mess up anything, and we only check for
+ * the old-style name when we've already failed to find a new-style
+ * one.
+ *
+ * Create a new-style file name, and if we're not going to open the
+ * file, return regardless.
+ */
+ (void)snprintf(new, sizeof(new), LFNAME, filenumber);
+ if ((ret = __db_appname(env,
+ DB_APP_LOG, new, NULL, namep)) != 0 || fhpp == NULL)
+ return (ret);
+
+ /* The application may have specified an absolute file mode. */
+ if (lp->filemode == 0)
+ mode = env->db_mode;
+ else {
+ LF_SET(DB_OSO_ABSMODE);
+ mode = lp->filemode;
+ }
+
+ /* Open the new-style file -- if we succeed, we're done. */
+ dblp->lf_timestamp = lp->timestamp;
+ if ((ret = __os_open(env, *namep, 0, flags, mode, fhpp)) == 0)
+ return (0);
+
+ /*
+ * If the open failed for reason other than the file
+ * not being there, complain loudly, the wrong user
+ * probably started up the application.
+ */
+ if (ret != ENOENT) {
+ __db_err(env, ret, DB_STR_A("2520",
+ "%s: log file unreadable", "%s"), *namep);
+ return (__env_panic(env, ret));
+ }
+
+ /*
+ * The open failed... if the DB_RDONLY flag isn't set, we're done,
+ * the caller isn't interested in old-style files.
+ */
+ if (!LF_ISSET(DB_OSO_RDONLY)) {
+ __db_err(env, ret, DB_STR_A("2521",
+ "%s: log file open failed", "%s"), *namep);
+ return (__env_panic(env, ret));
+ }
+
+ /* Create an old-style file name. */
+ (void)snprintf(old, sizeof(old), LFNAME_V1, filenumber);
+ if ((ret = __db_appname(env,
+ DB_APP_LOG, old, NULL, &oname)) != 0)
+ goto err;
+
+ /*
+ * Open the old-style file -- if we succeed, we're done. Free the
+ * space allocated for the new-style name and return the old-style
+ * name to the caller.
+ */
+ if ((ret = __os_open(env, oname, 0, flags, mode, fhpp)) == 0) {
+ __os_free(env, *namep);
+ *namep = oname;
+ return (0);
+ }
+
+ /*
+ * Couldn't find either style of name -- return the new-style name
+ * for the caller's error message. If it's an old-style name that's
+ * actually missing we're going to confuse the user with the error
+ * message, but that implies that not only were we looking for an
+ * old-style name, but we expected it to exist and we weren't just
+ * looking for any log file. That's not a likely error.
+ */
+err: __os_free(env, oname);
+ return (ret);
+}
+
+/*
+ * __log_rep_put --
+ * Short-circuit way for replication clients to put records into the
+ * log. Replication clients' logs need to be laid out exactly as their masters'
+ * are, so we let replication take responsibility for when the log gets
+ * flushed, when log switches files, etc. This is just a thin PUBLIC wrapper
+ * for __log_putr with a slightly prettier interface.
+ *
+ * Note that the REP->mtx_clientdb should be held when this is called.
+ * Note that we acquire the log region mutex while holding mtx_clientdb.
+ *
+ * PUBLIC: int __log_rep_put __P((ENV *, DB_LSN *, const DBT *, u_int32_t));
+ */
+int
+__log_rep_put(env, lsnp, rec, flags)
+ ENV *env;
+ DB_LSN *lsnp;
+ const DBT *rec;
+ u_int32_t flags;
+{
+ DBT *dbt, t;
+ DB_CIPHER *db_cipher;
+ DB_LOG *dblp;
+ HDR hdr;
+ LOG *lp;
+ int need_free, ret;
+
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+
+ LOG_SYSTEM_LOCK(env);
+ memset(&hdr, 0, sizeof(HDR));
+ t = *rec;
+ dbt = &t;
+ need_free = 0;
+ db_cipher = env->crypto_handle;
+ if (CRYPTO_ON(env))
+ t.size += db_cipher->adj_size(rec->size);
+ if ((ret = __os_calloc(env, 1, t.size, &t.data)) != 0)
+ goto err;
+ need_free = 1;
+ memcpy(t.data, rec->data, rec->size);
+
+ if ((ret = __log_encrypt_record(env, dbt, &hdr, rec->size)) != 0)
+ goto err;
+
+ DB_ASSERT(env, LOG_COMPARE(lsnp, &lp->lsn) == 0);
+ ret = __log_putr(dblp, lsnp, dbt, lp->lsn.offset - lp->len, &hdr);
+err:
+ /*
+ * !!! Assume caller holds REP->mtx_clientdb to modify ready_lsn.
+ */
+ lp->ready_lsn = lp->lsn;
+
+ if (LF_ISSET(DB_LOG_CHKPNT))
+ lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
+
+ /* Increment count of records added to the log. */
+ STAT(++lp->stat.st_record);
+ LOG_SYSTEM_UNLOCK(env);
+ if (need_free)
+ __os_free(env, t.data);
+ return (ret);
+}
+
+static int
+__log_encrypt_record(env, dbt, hdr, orig)
+ ENV *env;
+ DBT *dbt;
+ HDR *hdr;
+ u_int32_t orig;
+{
+ DB_CIPHER *db_cipher;
+ int ret;
+
+ if (CRYPTO_ON(env)) {
+ db_cipher = env->crypto_handle;
+ hdr->size = HDR_CRYPTO_SZ;
+ hdr->orig_size = orig;
+ if ((ret = db_cipher->encrypt(env, db_cipher->data,
+ hdr->iv, dbt->data, dbt->size)) != 0)
+ return (ret);
+ } else {
+ hdr->size = HDR_NORMAL_SZ;
+ }
+ return (0);
+}
+/*
+ * __log_put_record_pp --
+ * DB_ENV->log_put_record pre/post processing.
+ *
+ * PUBLIC: int __log_put_record_pp __P((DB_ENV *, DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int32_t,
+ * PUBLIC: DB_LOG_RECSPEC *, ...));
+ */
+#ifdef STDC_HEADERS
+int
+__log_put_record_pp(DB_ENV *dbenv, DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp,
+ u_int32_t flags, u_int32_t rectype, u_int32_t has_data, u_int32_t size,
+ DB_LOG_RECSPEC *spec, ...)
+#else
+int
+__log_put_record_pp(dbenv, dbp, txnp, ret_lsnp,
+ flags, rectype, has_data, size,
+ spec, va_alist)
+ DB_ENV *dbenv;
+ DB *dbp;
+ DB_TXN *txnp;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t rectype;
+ u_int32_t has_data;
+ u_int32_t size;
+ DB_LOG_RECSPEC *spec;
+ va_dcl
+#endif
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ va_list argp;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_REQUIRES_CONFIG(env,
+ env->lg_handle, "DB_ENV->log_put_record", DB_INIT_LOG);
+
+ /* Validate arguments: check for allowed flags. */
+ if ((ret = __db_fchk(env, "DB_ENV->log_put_record", flags,
+ DB_LOG_CHKPNT | DB_LOG_COMMIT |
+ DB_FLUSH | DB_LOG_NOCOPY | DB_LOG_WRNOSYNC)) != 0)
+ return (ret);
+
+ /* DB_LOG_WRNOSYNC and DB_FLUSH are mutually exclusive. */
+ if (LF_ISSET(DB_LOG_WRNOSYNC) && LF_ISSET(DB_FLUSH))
+ return (__db_ferr(env, "DB_ENV->log_put_record", 1));
+
+ /* Replication clients should never write log records. */
+ if (IS_REP_CLIENT(env)) {
+ __db_errx(env, DB_STR("2522",
+ "DB_ENV->log_put is illegal on replication clients"));
+ return (EINVAL);
+ }
+
+ ENV_ENTER(env, ip);
+ va_start(argp, spec);
+ REPLICATION_WRAP(env, (__log_put_record_int(env, dbp,
+ txnp, ret_lsnp, flags, rectype, has_data, size, spec, argp)),
+ 0, ret);
+ va_end(argp);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __log_put_record __P((ENV *, DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int32_t,
+ * PUBLIC: DB_LOG_RECSPEC *, ...));
+ */
+#ifdef STDC_HEADERS
+int
+__log_put_record(ENV *env, DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp,
+ u_int32_t flags, u_int32_t rectype, u_int32_t has_data, u_int32_t size,
+ DB_LOG_RECSPEC *spec, ...)
+#else
+int
+__log_put_record(env, dbp, txnp, ret_lsnp,
+ flags, rectype, has_data, size, spec, va_alist);
+ ENV *env;
+ DB *dbp;
+ DB_TXN *txnp;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t rectype;
+ u_int32_t has_data;
+ u_int32_t size;
+ DB_LOG_RECSPEC *spec;
+ va_dcl
+#endif
+{
+ va_list argp;
+ int ret;
+
+ va_start(argp, spec);
+ ret = __log_put_record_int(env, dbp, txnp, ret_lsnp, flags,
+ rectype, has_data, size, spec, argp);
+ va_end(argp);
+ return (ret);
+}
+
+#ifdef STDC_HEADERS
+static int
+__log_put_record_int(ENV *env, DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp,
+ u_int32_t flags, u_int32_t rectype, u_int32_t has_data, u_int32_t size,
+ DB_LOG_RECSPEC *spec, va_list argp)
+#else
+int
+__log_put_record_int(env, dbp, txnp, ret_lsnp,
+ flags, rectype, has_data, size, spec, argp);
+ ENV *env;
+ DB *dbp;
+ DB_TXN *txnp;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t has_data;
+ u_int32_t size;
+ u_int32_t rectype;
+ DB_LOG_RECSPEC *spec;
+ va_list argp;
+#endif
+{
+ DBT *data, *dbt, *header, logrec;
+ DB_LOG_RECSPEC *sp;
+ DB_LSN *lsnp, lsn, null_lsn, *pagelsn, *rlsnp;
+ DB_TXNLOGREC *lr;
+ LOG *lp;
+ PAGE *pghdrstart;
+ u_int32_t hdrsize, op, zero, uinttmp, txn_num;
+ u_int npad;
+ u_int8_t *bp;
+ int is_durable, ret;
+ void *hdrstart;
+
+ COMPQUIET(lr, NULL);
+ COMPQUIET(hdrsize, 0);
+ COMPQUIET(op, 0);
+ COMPQUIET(hdrstart, NULL);
+ COMPQUIET(pghdrstart, NULL);
+ COMPQUIET(header, NULL);
+
+ /*
+ * rlsnp will be stored into while holding the log system lock.
+ * If this is a commit record then ret_lsnp will be the address of
+ * the transaction detail visible_lsn field. If not then this
+ * may be the lsn of a page and we do not want to set it if
+ * the log_put fails after writing the record (due to an I/O error).
+ */
+ if (LF_ISSET(DB_LOG_COMMIT))
+ rlsnp = ret_lsnp;
+ else
+ rlsnp = &lsn;
+ npad = 0;
+ ret = 0;
+ data = NULL;
+
+ if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
+ (dbp != NULL && F_ISSET(dbp, DB_AM_NOT_DURABLE))) {
+ if (txnp == NULL)
+ return (0);
+ is_durable = 0;
+ } else
+ is_durable = 1;
+
+ if (txnp == NULL) {
+ txn_num = 0;
+ lsnp = &null_lsn;
+ null_lsn.file = null_lsn.offset = 0;
+ } else {
+ if (TAILQ_FIRST(&txnp->kids) != NULL &&
+ (ret = __txn_activekids(env, rectype, txnp)) != 0)
+ return (ret);
+ /*
+ * We need to assign begin_lsn while holding region mutex.
+ * That assignment is done inside the DbEnv->log_put call,
+ * so pass in the appropriate memory location to be filled
+ * in by the log_put code.
+ */
+ DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
+ txn_num = txnp->txnid;
+ }
+
+ if (dbp != NULL) {
+ DB_ASSERT(env, dbp->log_filename != NULL);
+ if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
+ (ret = __dbreg_lazy_id(dbp)) != 0)
+ return (ret);
+ }
+
+ logrec.size = size;
+
+ if (CRYPTO_ON(env)) {
+ npad = env->crypto_handle->adj_size(logrec.size);
+ logrec.size += npad;
+ }
+
+ if (is_durable || txnp == NULL) {
+ if ((ret =
+ __os_malloc(env, logrec.size, &logrec.data)) != 0)
+ return (ret);
+ } else {
+ if ((ret = __os_malloc(env,
+ logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
+ return (ret);
+#ifdef DIAGNOSTIC
+ if ((ret =
+ __os_malloc(env, logrec.size, &logrec.data)) != 0) {
+ __os_free(env, lr);
+ return (ret);
+ }
+#else
+ logrec.data = lr->data;
+#endif
+ }
+ if (npad > 0)
+ memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
+
+ bp = logrec.data;
+
+ LOGCOPY_32(env, bp, &rectype);
+ bp += sizeof(rectype);
+
+ LOGCOPY_32(env, bp, &txn_num);
+ bp += sizeof(txn_num);
+
+ LOGCOPY_FROMLSN(env, bp, lsnp);
+ bp += sizeof(DB_LSN);
+
+ zero = 0;
+ lp = env->lg_handle->reginfo.primary;
+ for (sp = spec; sp->type != LOGREC_Done; sp++) {
+ switch (sp->type) {
+ case LOGREC_DB:
+ /* This is not in the varargs. */
+ uinttmp = (u_int32_t)dbp->log_filename->id;
+ LOGCOPY_32(env, bp, &uinttmp);
+ bp += sizeof(uinttmp);
+ break;
+
+ case LOGREC_ARG:
+ case LOGREC_TIME:
+ case LOGREC_DBOP:
+ uinttmp = va_arg(argp, u_int32_t);
+ LOGCOPY_32(env, bp, &uinttmp);
+ bp += sizeof(uinttmp);
+ break;
+ case LOGREC_OP:
+ op = va_arg(argp, u_int32_t);
+ LOGCOPY_32(env, bp, &op);
+ bp += sizeof(uinttmp);
+ break;
+ case LOGREC_DBT:
+ case LOGREC_PGLIST:
+ case LOGREC_LOCKS:
+ case LOGREC_HDR:
+ case LOGREC_DATA:
+ dbt = va_arg(argp, DBT *);
+ if (dbt == NULL) {
+ LOGCOPY_32(env, bp, &zero);
+ bp += sizeof(u_int32_t);
+ } else {
+ LOGCOPY_32(env, bp, &dbt->size);
+ bp += sizeof(dbt->size);
+ memcpy(bp, dbt->data, dbt->size);
+ }
+ /* Process fields that need to be byte swapped. */
+ if (dbp != NULL && F_ISSET(dbp, DB_AM_SWAP)) {
+ if (sp->type == LOGREC_HDR &&
+ dbt != NULL && has_data == 0)
+ __db_recordswap(op,
+ dbt->size, bp, NULL, 0);
+ else if (sp->type == LOGREC_HDR) {
+ hdrstart = bp;
+ hdrsize = dbt == NULL ? 0 : dbt->size;
+ } else if (sp->type == LOGREC_DATA) {
+ __db_recordswap(op,
+ hdrsize, hdrstart, bp, 0);
+ has_data = 0;
+ }
+ }
+ if (dbt != NULL)
+ bp += dbt->size;
+
+ break;
+ /*
+ * Page header and data -- we assume that the header
+ * is listed first and the data follows sometime later.
+ * There should be only one header/data pair per record.
+ */
+ case LOGREC_PGDBT:
+ header = va_arg(argp, DBT *);
+ if (header == NULL) {
+ LOGCOPY_32(env, bp, &zero);
+ bp += sizeof(u_int32_t);
+ } else {
+ LOGCOPY_32(env, bp, &header->size);
+ bp += sizeof(header->size);
+ pghdrstart = (PAGE *)bp;
+ memcpy(bp, header->data, header->size);
+ if (has_data == 0 &&
+ F_ISSET(dbp, DB_AM_SWAP) &&
+ (ret = __db_pageswap(
+ env, dbp, pghdrstart, (size_t)header->size,
+ NULL, 0)) != 0)
+ return (ret);
+ bp += header->size;
+ }
+ break;
+
+ case LOGREC_PGDDBT:
+ data = va_arg(argp, DBT *);
+ if (data == NULL) {
+ zero = 0;
+ LOGCOPY_32(env, bp, &zero);
+ bp += sizeof(u_int32_t);
+ } else {
+ if (F_ISSET(dbp, DB_AM_SWAP) &&
+ (ret = __db_pageswap(env, dbp, pghdrstart,
+ (size_t)header->size, (DBT *)data, 0)) != 0)
+ return (ret);
+ LOGCOPY_32(env, bp, &data->size);
+ bp += sizeof(data->size);
+ memcpy(bp, data->data, data->size);
+ if (F_ISSET(dbp, DB_AM_SWAP) &&
+ F_ISSET(data, DB_DBT_APPMALLOC))
+ __os_free(env, data->data);
+ bp += data->size;
+ }
+ break;
+ case LOGREC_POINTER:
+ pagelsn = va_arg(argp, DB_LSN *);
+ if (pagelsn != NULL) {
+ if (txnp != NULL) {
+ if (LOG_COMPARE(pagelsn,
+ &lp->lsn) >= 0 && (ret =
+ __log_check_page_lsn(env,
+ dbp, pagelsn)) != 0)
+ return (ret);
+ }
+ LOGCOPY_FROMLSN(env, bp, pagelsn);
+ } else
+ memset(bp, 0, sizeof(*pagelsn));
+ bp += sizeof(*pagelsn);
+ break;
+
+ default:
+ DB_ASSERT(env, sp->type != sp->type);
+ }
+ }
+
+ DB_ASSERT(env,
+ (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+
+ if (is_durable || txnp == NULL) {
+ if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
+ flags | DB_LOG_NOCOPY)) == 0) {
+ if (txnp != NULL)
+ *lsnp = *rlsnp;
+ *ret_lsnp = *rlsnp;
+ }
+ } else {
+ ret = 0;
+#ifdef DIAGNOSTIC
+ /*
+ * Set the debug bit if we are going to log non-durable
+ * transactions so they will be ignored by recovery.
+ */
+ memcpy(lr->data, logrec.data, logrec.size);
+ rectype |= DB_debug_FLAG;
+ LOGCOPY_32(env, logrec.data, &rectype);
+
+ if (!IS_REP_CLIENT(env) && !lp->db_log_inmemory)
+ ret = __log_put(env,
+ rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
+#endif
+ STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
+ F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
+ LSN_NOT_LOGGED(*ret_lsnp);
+ }
+
+#ifdef LOG_DIAGNOSTIC
+ if (ret != 0)
+ (void)__db_addrem_print(env,
+ (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
+#endif
+
+#ifdef DIAGNOSTIC
+ __os_free(env, logrec.data);
+#else
+ if (is_durable || txnp == NULL)
+ __os_free(env, logrec.data);
+#endif
+ return (ret);
+}
diff --git a/src/log/log_stat.c b/src/log/log_stat.c
new file mode 100644
index 00000000..37b74c74
--- /dev/null
+++ b/src/log/log_stat.c
@@ -0,0 +1,336 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+
+#ifdef HAVE_STATISTICS
+static int __log_print_all __P((ENV *, u_int32_t));
+static int __log_print_stats __P((ENV *, u_int32_t));
+static int __log_stat __P((ENV *, DB_LOG_STAT **, u_int32_t));
+
+/*
+ * __log_stat_pp --
+ * DB_ENV->log_stat pre/post processing.
+ *
+ * PUBLIC: int __log_stat_pp __P((DB_ENV *, DB_LOG_STAT **, u_int32_t));
+ */
+int
+__log_stat_pp(dbenv, statp, flags)
+ DB_ENV *dbenv;
+ DB_LOG_STAT **statp;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_REQUIRES_CONFIG(env,
+ env->lg_handle, "DB_ENV->log_stat", DB_INIT_LOG);
+
+ if ((ret = __db_fchk(env,
+ "DB_ENV->log_stat", flags, DB_STAT_CLEAR)) != 0)
+ return (ret);
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__log_stat(env, statp, flags)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __log_stat --
+ * DB_ENV->log_stat.
+ */
+static int
+__log_stat(env, statp, flags)
+ ENV *env;
+ DB_LOG_STAT **statp;
+ u_int32_t flags;
+{
+ DB_LOG *dblp;
+ DB_LOG_STAT *stats;
+ LOG *lp;
+ int ret;
+
+ *statp = NULL;
+
+ dblp = env->lg_handle;
+ lp = dblp->reginfo.primary;
+
+ if ((ret = __os_umalloc(env, sizeof(DB_LOG_STAT), &stats)) != 0)
+ return (ret);
+
+ /* Copy out the global statistics. */
+ LOG_SYSTEM_LOCK(env);
+ *stats = lp->stat;
+ if (LF_ISSET(DB_STAT_CLEAR))
+ memset(&lp->stat, 0, sizeof(lp->stat));
+
+ stats->st_magic = lp->persist.magic;
+ stats->st_version = lp->persist.version;
+ stats->st_mode = lp->filemode;
+ stats->st_lg_bsize = lp->buffer_size;
+ stats->st_lg_size = lp->log_nsize;
+
+ __mutex_set_wait_info(env, lp->mtx_region,
+ &stats->st_region_wait, &stats->st_region_nowait);
+ if (LF_ISSET(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM) == DB_STAT_CLEAR)
+ __mutex_clear(env, lp->mtx_region);
+ stats->st_regsize = dblp->reginfo.rp->size;
+
+ stats->st_cur_file = lp->lsn.file;
+ stats->st_cur_offset = lp->lsn.offset;
+ stats->st_disk_file = lp->s_lsn.file;
+ stats->st_disk_offset = lp->s_lsn.offset;
+
+ LOG_SYSTEM_UNLOCK(env);
+
+ *statp = stats;
+ return (0);
+}
+
+/*
+ * __log_stat_print_pp --
+ * DB_ENV->log_stat_print pre/post processing.
+ *
+ * PUBLIC: int __log_stat_print_pp __P((DB_ENV *, u_int32_t));
+ */
+int
+__log_stat_print_pp(dbenv, flags)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_REQUIRES_CONFIG(env,
+ env->lg_handle, "DB_ENV->log_stat_print", DB_INIT_LOG);
+
+ if ((ret = __db_fchk(env, "DB_ENV->log_stat_print",
+ flags, DB_STAT_ALL | DB_STAT_ALLOC | DB_STAT_CLEAR)) != 0)
+ return (ret);
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__log_stat_print(env, flags)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __log_stat_print --
+ * DB_ENV->log_stat_print method.
+ *
+ * PUBLIC: int __log_stat_print __P((ENV *, u_int32_t));
+ */
+int
+__log_stat_print(env, flags)
+ ENV *env;
+ u_int32_t flags;
+{
+ u_int32_t orig_flags;
+ int ret;
+
+ orig_flags = flags;
+ LF_CLR(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM);
+ if (flags == 0 || LF_ISSET(DB_STAT_ALL)) {
+ ret = __log_print_stats(env, orig_flags);
+ if (flags == 0 || ret != 0)
+ return (ret);
+ }
+
+ if (LF_ISSET(DB_STAT_ALL) &&
+ (ret = __log_print_all(env, orig_flags)) != 0)
+ return (ret);
+
+ return (0);
+}
+
+/*
+ * __log_print_stats --
+ * Display default log region statistics.
+ */
+static int
+__log_print_stats(env, flags)
+ ENV *env;
+ u_int32_t flags;
+{
+ DB_LOG_STAT *sp;
+ int ret;
+
+ if ((ret = __log_stat(env, &sp, flags)) != 0)
+ return (ret);
+
+ if (LF_ISSET(DB_STAT_ALL))
+ __db_msg(env, "Default logging region information:");
+ STAT_HEX("Log magic number", sp->st_magic);
+ STAT_ULONG("Log version number", sp->st_version);
+ __db_dlbytes(env, "Log record cache size",
+ (u_long)0, (u_long)0, (u_long)sp->st_lg_bsize);
+ __db_msg(env, "%#o\tLog file mode", sp->st_mode);
+ if (sp->st_lg_size % MEGABYTE == 0)
+ __db_msg(env, "%luMb\tCurrent log file size",
+ (u_long)sp->st_lg_size / MEGABYTE);
+ else if (sp->st_lg_size % 1024 == 0)
+ __db_msg(env, "%luKb\tCurrent log file size",
+ (u_long)sp->st_lg_size / 1024);
+ else
+ __db_msg(env, "%lu\tCurrent log file size",
+ (u_long)sp->st_lg_size);
+ __db_dl(env, "Initial fileid allocation", (u_long)sp->st_fileid_init);
+ __db_dl(env, "Current fileids in use", (u_long)sp->st_nfileid);
+ __db_dl(env, "Maximum fileids used", (u_long)sp->st_maxnfileid);
+ __db_dl(env, "Records entered into the log", (u_long)sp->st_record);
+ __db_dlbytes(env, "Log bytes written",
+ (u_long)0, (u_long)sp->st_w_mbytes, (u_long)sp->st_w_bytes);
+ __db_dlbytes(env, "Log bytes written since last checkpoint",
+ (u_long)0, (u_long)sp->st_wc_mbytes, (u_long)sp->st_wc_bytes);
+ __db_dl(env, "Total log file I/O writes", (u_long)sp->st_wcount);
+ __db_dl(env, "Total log file I/O writes due to overflow",
+ (u_long)sp->st_wcount_fill);
+ __db_dl(env, "Total log file flushes", (u_long)sp->st_scount);
+ __db_dl(env, "Total log file I/O reads", (u_long)sp->st_rcount);
+ STAT_ULONG("Current log file number", sp->st_cur_file);
+ STAT_ULONG("Current log file offset", sp->st_cur_offset);
+ STAT_ULONG("On-disk log file number", sp->st_disk_file);
+ STAT_ULONG("On-disk log file offset", sp->st_disk_offset);
+
+ __db_dl(env,
+ "Maximum commits in a log flush", (u_long)sp->st_maxcommitperflush);
+ __db_dl(env,
+ "Minimum commits in a log flush", (u_long)sp->st_mincommitperflush);
+
+ __db_dlbytes(env, "Region size",
+ (u_long)0, (u_long)0, (u_long)sp->st_regsize);
+ __db_dl_pct(env,
+ "The number of region locks that required waiting",
+ (u_long)sp->st_region_wait, DB_PCT(sp->st_region_wait,
+ sp->st_region_wait + sp->st_region_nowait), NULL);
+
+ __os_ufree(env, sp);
+
+ return (0);
+}
+
+/*
+ * __log_print_all --
+ * Display debugging log region statistics.
+ */
+static int
+__log_print_all(env, flags)
+ ENV *env;
+ u_int32_t flags;
+{
+ static const FN fn[] = {
+ { DBLOG_RECOVER, "DBLOG_RECOVER" },
+ { DBLOG_FORCE_OPEN, "DBLOG_FORCE_OPEN" },
+ { DBLOG_AUTOREMOVE, "DBLOG_AUTOREMOVE"},
+ { DBLOG_DIRECT, "DBLOG_DIRECT"},
+ { DBLOG_DSYNC, "DBLOG_DSYNC"},
+ { DBLOG_FORCE_OPEN, "DBLOG_FORCE_OPEN"},
+ { DBLOG_INMEMORY, "DBLOG_INMEMORY"},
+ { DBLOG_OPENFILES, "DBLOG_OPENFILES"},
+ { DBLOG_RECOVER, "DBLOG_RECOVER"},
+ { DBLOG_ZERO, "DBLOG_ZERO"},
+ { 0, NULL }
+ };
+ DB_LOG *dblp;
+ LOG *lp;
+
+ dblp = env->lg_handle;
+ lp = (LOG *)dblp->reginfo.primary;
+
+ LOG_SYSTEM_LOCK(env);
+
+ __db_print_reginfo(env, &dblp->reginfo, "Log", flags);
+
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ __db_msg(env, "DB_LOG handle information:");
+ __mutex_print_debug_single(
+ env, "DB_LOG handle mutex", dblp->mtx_dbreg, flags);
+ STAT_ULONG("Log file name", dblp->lfname);
+ __db_print_fh(env, "Log file handle", dblp->lfhp, flags);
+ __db_prflags(env, NULL, dblp->flags, fn, NULL, "\tFlags");
+
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ __db_msg(env, "LOG handle information:");
+ __mutex_print_debug_single(
+ env, "LOG region mutex", lp->mtx_region, flags);
+ __mutex_print_debug_single(
+ env, "File name list mutex", lp->mtx_filelist, flags);
+
+ STAT_HEX("persist.magic", lp->persist.magic);
+ STAT_ULONG("persist.version", lp->persist.version);
+ __db_dlbytes(env,
+ "persist.log_size", (u_long)0, (u_long)0, lp->persist.log_size);
+ STAT_FMT("log file permissions mode", "%#lo", u_long, lp->filemode);
+ STAT_LSN("current file offset LSN", &lp->lsn);
+ STAT_LSN("first buffer byte LSN", &lp->lsn);
+ STAT_ULONG("current buffer offset", lp->b_off);
+ STAT_ULONG("current file write offset", lp->w_off);
+ STAT_ULONG("length of last record", lp->len);
+ STAT_LONG("log flush in progress", lp->in_flush);
+ __mutex_print_debug_single(
+ env, "Log flush mutex", lp->mtx_flush, flags);
+
+ STAT_LSN("last sync LSN", &lp->s_lsn);
+
+ /*
+ * Don't display the replication fields here, they're displayed as part
+ * of the replication statistics.
+ */
+
+ STAT_LSN("cached checkpoint LSN", &lp->cached_ckp_lsn);
+
+ __db_dlbytes(env,
+ "log buffer size", (u_long)0, (u_long)0, lp->buffer_size);
+ __db_dlbytes(env,
+ "log file size", (u_long)0, (u_long)0, lp->log_size);
+ __db_dlbytes(env,
+ "next log file size", (u_long)0, (u_long)0, lp->log_nsize);
+
+ STAT_ULONG("transactions waiting to commit", lp->ncommit);
+ STAT_LSN("LSN of first commit", &lp->t_lsn);
+
+ LOG_SYSTEM_UNLOCK(env);
+
+ return (0);
+}
+
+#else /* !HAVE_STATISTICS */
+
+int
+__log_stat_pp(dbenv, statp, flags)
+ DB_ENV *dbenv;
+ DB_LOG_STAT **statp;
+ u_int32_t flags;
+{
+ COMPQUIET(statp, NULL);
+ COMPQUIET(flags, 0);
+
+ return (__db_stat_not_built(dbenv->env));
+}
+
+int
+__log_stat_print_pp(dbenv, flags)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+{
+ COMPQUIET(flags, 0);
+
+ return (__db_stat_not_built(dbenv->env));
+}
+#endif
diff --git a/src/log/log_verify.c b/src/log/log_verify.c
new file mode 100644
index 00000000..e7f8f688
--- /dev/null
+++ b/src/log/log_verify.c
@@ -0,0 +1,437 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/fop.h"
+#include "dbinc/hash.h"
+#include "dbinc/qam.h"
+#include "dbinc/txn.h"
+
+#include "dbinc/log_verify.h"
+
+#define FIRST_OFFSET(env) \
+ (sizeof(LOGP) + (CRYPTO_ON(env) ? HDR_CRYPTO_SZ : HDR_NORMAL_SZ))
+
+static int __env_init_verify __P((ENV *, u_int32_t, DB_DISTAB *));
+
+/*
+ * PUBLIC: int __log_verify_pp __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *));
+ */
+int
+__log_verify_pp(dbenv, lvconfig)
+ DB_ENV *dbenv;
+ const DB_LOG_VERIFY_CONFIG *lvconfig;
+{
+ int lsnrg, ret, timerg;
+ DB_THREAD_INFO *ip;
+ const char *phome;
+
+ lsnrg = ret = timerg = 0;
+ phome = NULL;
+
+ if (!IS_ZERO_LSN(lvconfig->start_lsn) ||
+ !IS_ZERO_LSN(lvconfig->end_lsn))
+ lsnrg = 1;
+ if (lvconfig->start_time != 0 || lvconfig->end_time != 0)
+ timerg = 1;
+
+ if ((!IS_ZERO_LSN(lvconfig->start_lsn) && lvconfig->start_time != 0) ||
+ (!IS_ZERO_LSN(lvconfig->end_lsn) && lvconfig->end_time != 0) ||
+ (lsnrg && timerg)) {
+ __db_errx(dbenv->env, DB_STR("2501",
+ "Set either an lsn range or a time range to verify logs "
+ "in the range, don't mix time and lsn."));
+ ret = EINVAL;
+ goto err;
+ }
+ phome = dbenv->env->db_home;
+ if (phome != NULL && lvconfig->temp_envhome != NULL &&
+ strcmp(phome, lvconfig->temp_envhome) == 0) {
+ __db_errx(dbenv->env,
+ "Environment home for log verification internal use "
+ "overlaps with that of the environment to verify.");
+ ret = EINVAL;
+ goto err;
+ }
+
+ ENV_ENTER(dbenv->env, ip);
+ ret = __log_verify(dbenv, lvconfig, ip);
+ ENV_LEAVE(dbenv->env, ip);
+err: return (ret);
+}
+
+/*
+ * PUBLIC: int __log_verify __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *,
+ * PUBLIC: DB_THREAD_INFO *));
+ */
+int
+__log_verify(dbenv, lvconfig, ip)
+ DB_ENV *dbenv;
+ const DB_LOG_VERIFY_CONFIG *lvconfig;
+ DB_THREAD_INFO *ip;
+{
+
+ u_int32_t logcflag, max_fileno;
+ DB_LOGC *logc;
+ ENV *env;
+ DBT data;
+ DB_DISTAB dtab;
+ DB_LSN key, start, start2, stop, stop2, verslsn;
+ u_int32_t newversion, version;
+ int cmp, fwdscroll, goprev, ret, tret;
+ time_t starttime, endtime;
+ const char *okmsg;
+ DB_LOG_VRFY_INFO *logvrfy_hdl;
+
+ okmsg = NULL;
+ fwdscroll = 1;
+ max_fileno = (u_int32_t)-1;
+ goprev = 0;
+ env = dbenv->env;
+ logc = NULL;
+ memset(&dtab, 0, sizeof(dtab));
+ memset(&data, 0, sizeof(data));
+ version = newversion = 0;
+ ZERO_LSN(verslsn);
+ memset(&start, 0, sizeof(DB_LSN));
+ memset(&start2, 0, sizeof(DB_LSN));
+ memset(&stop, 0, sizeof(DB_LSN));
+ memset(&stop2, 0, sizeof(DB_LSN));
+ memset(&key, 0, sizeof(DB_LSN));
+ memset(&verslsn, 0, sizeof(DB_LSN));
+
+ start = lvconfig->start_lsn;
+ stop = lvconfig->end_lsn;
+ starttime = lvconfig->start_time;
+ endtime = lvconfig->end_time;
+
+ if ((ret = __create_log_vrfy_info(lvconfig, &logvrfy_hdl, ip)) != 0)
+ goto err;
+ logvrfy_hdl->lv_config = lvconfig;
+ if (lvconfig->continue_after_fail)
+ F_SET(logvrfy_hdl, DB_LOG_VERIFY_CAF);
+ if (lvconfig->verbose)
+ F_SET(logvrfy_hdl, DB_LOG_VERIFY_VERBOSE);
+
+ /* Allocate a log cursor. */
+ if ((ret = __log_cursor(dbenv->env, &logc)) != 0) {
+ __db_err(dbenv->env, ret, "DB_ENV->log_cursor");
+ goto err;
+ }
+ /* Ignore failed chksum and go on with next one. */
+ F_SET(logc->env->lg_handle, DBLOG_VERIFYING);
+
+ /* Only scan the range that we want to verify. */
+ if (fwdscroll) {
+ if (IS_ZERO_LSN(stop)) {
+ logcflag = DB_LAST;
+ key.file = key.offset = 0;
+ } else {
+ key = stop;
+ logcflag = DB_SET;
+ }
+ logvrfy_hdl->flags |= DB_LOG_VERIFY_FORWARD;
+ goto startscroll;
+ }
+
+vrfyscroll:
+
+ /*
+ * Initialize version to 0 so that we get the
+ * correct version right away.
+ */
+ version = 0;
+ ZERO_LSN(verslsn);
+
+ /*
+ * In the log verification config struct, start_lsn and end_lsn have
+ * higher priority than start_time and end_time, and you can specify
+ * either lsn or time to start/stop verification.
+ */
+ if (starttime != 0 || endtime != 0) {
+ if ((ret = __find_lsnrg_by_timerg(logvrfy_hdl,
+ starttime, endtime, &start2, &stop2)) != 0)
+ goto err;
+ ((DB_LOG_VERIFY_CONFIG *)lvconfig)->start_lsn = start = start2;
+ ((DB_LOG_VERIFY_CONFIG *)lvconfig)->end_lsn = stop = stop2;
+ }
+
+ if (IS_ZERO_LSN(start)) {
+ logcflag = DB_FIRST;
+ key.file = key.offset = 0;
+ } else {
+ key = start;
+ logcflag = DB_SET;
+ F_SET(logvrfy_hdl, DB_LOG_VERIFY_PARTIAL);
+ }
+ goprev = 0;
+
+ /*
+ * So far we only support verifying a specific db file. The config's
+ * dbfile must be prefixed with the data directory if it's not in
+ * environment home directory.
+ */
+ if (lvconfig->dbfile != NULL) {
+ F_SET(logvrfy_hdl,
+ DB_LOG_VERIFY_DBFILE | DB_LOG_VERIFY_PARTIAL);
+ if ((ret = __set_logvrfy_dbfuid(logvrfy_hdl)) != 0)
+ goto err;
+ }
+
+startscroll:
+
+ memset(&data, 0, sizeof(data));
+
+ for (;;) {
+
+ /*
+ * We may have reached beyond the range we're verifying.
+ */
+ if (!fwdscroll && !IS_ZERO_LSN(stop)) {
+ cmp = LOG_COMPARE(&key, &stop);
+ if (cmp > 0)
+ break;
+ }
+ if (fwdscroll && !IS_ZERO_LSN(start)) {
+ cmp = LOG_COMPARE(&key, &start);
+ if (cmp < 0)
+ break;
+ }
+
+ ret = __logc_get(logc, &key, &data, logcflag);
+ if (ret != 0) {
+ if (ret == DB_NOTFOUND) {
+ /* We may not start from the first log file. */
+ if (logcflag == DB_PREV && key.file > 1)
+ F_SET(logvrfy_hdl,
+ DB_LOG_VERIFY_PARTIAL);
+ break;
+ }
+ __db_err(dbenv->env, ret, "DB_LOGC->get");
+ /*
+ * When go beyond valid lsn range, we may get other
+ * error values than DB_NOTFOUND.
+ */
+ goto out;
+ }
+
+ if (logcflag == DB_SET) {
+ if (goprev)
+ logcflag = DB_PREV;
+ else
+ logcflag = DB_NEXT;
+ } else if (logcflag == DB_LAST) {
+ logcflag = DB_PREV;
+ max_fileno = key.file;
+ } else if (logcflag == DB_FIRST)
+ logcflag = DB_NEXT;
+
+ if (key.file != verslsn.file) {
+ /*
+ * If our log file changed, we need to see if the
+ * version of the log file changed as well.
+ * If it changed, reset the print table.
+ */
+ if ((ret = __logc_version(logc, &newversion)) != 0) {
+ __db_err(dbenv->env, ret, "DB_LOGC->version");
+ goto err;
+ }
+ if (version != newversion) {
+ version = newversion;
+ if (!IS_LOG_VRFY_SUPPORTED(version)) {
+ __db_msg(dbenv->env, DB_STR_A("2502",
+ "[%lu][%lu] Unsupported version of log file, "
+ "log file number: %u, log file version: %u, "
+ "supported log version: %u.",
+ "%lu %lu %u %u %u"),
+ (u_long)key.file,
+ (u_long)key.offset,
+ key.file, version, DB_LOGVERSION);
+ if (logcflag == DB_NEXT) {
+ key.file += 1;
+ if (key.file > max_fileno)
+ break;
+ /*
+ * Txns don't span log versions, no need to
+ * set DB_LOG_VERIFY_PARTIAL here.
+ */
+ } else {
+ goprev = 1;
+ key.file -= 1;
+ if (key.file == 0)
+ break;
+ }
+ key.offset = FIRST_OFFSET(env);
+ logcflag = DB_SET;
+ continue;
+ }
+ if ((ret = __env_init_verify(env, version,
+ &dtab)) != 0) {
+ __db_err(dbenv->env, ret,
+ DB_STR("2503",
+ "callback: initialization"));
+ goto err;
+ }
+ }
+ verslsn = key;
+ }
+
+ ret = __db_dispatch(dbenv->env, &dtab, &data, &key,
+ DB_TXN_LOG_VERIFY, logvrfy_hdl);
+
+ if (!fwdscroll && ret != 0) {
+ if (!F_ISSET(logvrfy_hdl, DB_LOG_VERIFY_CAF)) {
+ __db_err(dbenv->env, ret,
+ "[%lu][%lu] __db_dispatch",
+ (u_long)key.file, (u_long)key.offset);
+ goto err;
+ } else
+ F_SET(logvrfy_hdl, DB_LOG_VERIFY_ERR);
+ }
+ }
+
+ if (fwdscroll) {
+ fwdscroll = 0;
+ F_CLR(logvrfy_hdl, DB_LOG_VERIFY_FORWARD);
+ goto vrfyscroll;
+ }
+out:
+ /*
+ * When we arrive here ret can be 0 or errors returned by DB_LOGC->get,
+ * all which we have already handled. So we clear ret.
+ */
+ ret = 0;
+
+ /* If continuing after fail, we can complete the entire log. */
+ if (F_ISSET(logvrfy_hdl, DB_LOG_VERIFY_ERR) ||
+ F_ISSET(logvrfy_hdl, DB_LOG_VERIFY_INTERR))
+ ret = DB_LOG_VERIFY_BAD;
+ /*
+ * This function can be called when the environment is alive, so
+ * there can be active transactions.
+ */
+ __db_log_verify_global_report(logvrfy_hdl);
+ if (ret == DB_LOG_VERIFY_BAD)
+ okmsg = DB_STR_P("FAILED");
+ else {
+ DB_ASSERT(dbenv->env, ret == 0);
+ okmsg = DB_STR_P("SUCCEEDED");
+ }
+
+ __db_msg(dbenv->env, DB_STR_A("2504",
+ "Log verification ended and %s.", "%s"), okmsg);
+
+err:
+ if (logc != NULL)
+ (void)__logc_close(logc);
+ if ((tret = __destroy_log_vrfy_info(logvrfy_hdl)) != 0 && ret == 0)
+ ret = tret;
+ if (dtab.int_dispatch)
+ __os_free(dbenv->env, dtab.int_dispatch);
+ if (dtab.ext_dispatch)
+ __os_free(dbenv->env, dtab.ext_dispatch);
+
+ return (ret);
+}
+
+/*
+ * __env_init_verify--
+ */
+static int
+__env_init_verify(env, version, dtabp)
+ ENV *env;
+ u_int32_t version;
+ DB_DISTAB *dtabp;
+{
+ int ret;
+
+ /*
+ * We need to prime the print table with the current print
+ * functions. Then we overwrite only specific entries based on
+ * each previous version we support.
+ */
+ if ((ret = __bam_init_verify(env, dtabp)) != 0)
+ goto err;
+ if ((ret = __crdel_init_verify(env, dtabp)) != 0)
+ goto err;
+ if ((ret = __db_init_verify(env, dtabp)) != 0)
+ goto err;
+ if ((ret = __dbreg_init_verify(env, dtabp)) != 0)
+ goto err;
+ if ((ret = __fop_init_verify(env, dtabp)) != 0)
+ goto err;
+#ifdef HAVE_HASH
+ if ((ret = __ham_init_verify(env, dtabp)) != 0)
+ goto err;
+#endif
+#ifdef HAVE_HEAP
+ if ((ret = __heap_init_verify(env, dtabp)) != 0)
+ goto err;
+#endif
+#ifdef HAVE_QUEUE
+ if ((ret = __qam_init_verify(env, dtabp)) != 0)
+ goto err;
+#endif
+ if ((ret = __txn_init_verify(env, dtabp)) != 0)
+ goto err;
+
+ switch (version) {
+ case DB_LOGVERSION:
+ ret = 0;
+ break;
+
+ default:
+ __db_errx(env, DB_STR_A("2505", "Not supported version %lu",
+ "%lu"), (u_long)version);
+ ret = EINVAL;
+ break;
+ }
+err: return (ret);
+}
+
+/*
+ * __log_verify_wrap --
+ * Wrapper function for APIs of other languages, like java/c# and
+ * script languages. It's much easier to implement the swig layer
+ * when we split up the C structure.
+ *
+ * PUBLIC: int __log_verify_wrap __P((ENV *, const char *, u_int32_t,
+ * PUBLIC: const char *, const char *, time_t, time_t, u_int32_t,
+ * PUBLIC: u_int32_t, u_int32_t, u_int32_t, int, int));
+ */
+int
+__log_verify_wrap(env, envhome, cachesize, dbfile, dbname,
+ stime, etime, stfile, stoffset, efile, eoffset, caf, verbose)
+ ENV *env;
+ const char *envhome, *dbfile, *dbname;
+ time_t stime, etime;
+ u_int32_t cachesize, stfile, stoffset, efile, eoffset;
+ int caf, verbose;
+{
+ DB_LOG_VERIFY_CONFIG cfg;
+
+ memset(&cfg, 0, sizeof(cfg));
+ cfg.cachesize = cachesize;
+ cfg.temp_envhome = envhome;
+ cfg.dbfile = dbfile;
+ cfg.dbname = dbname;
+ cfg.start_time = stime;
+ cfg.end_time = etime;
+ cfg.start_lsn.file = stfile;
+ cfg.start_lsn.offset = stoffset;
+ cfg.end_lsn.file = efile;
+ cfg.end_lsn.offset = eoffset;
+ cfg.continue_after_fail = caf;
+ cfg.verbose = verbose;
+
+ return __log_verify_pp(env->dbenv, &cfg);
+}
diff --git a/src/log/log_verify_auto.c b/src/log/log_verify_auto.c
new file mode 100644
index 00000000..08bc5d64
--- /dev/null
+++ b/src/log/log_verify_auto.c
@@ -0,0 +1,318 @@
+/* Do not edit: automatically built by gen_rec.awk. */
+
+#include "db_config.h"
+#include "db_int.h"
+#include "dbinc/crypto.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+#include "dbinc/btree.h"
+#include "dbinc/txn.h"
+#include "dbinc/hash.h"
+#include "dbinc/heap.h"
+#include "dbinc/qam.h"
+#include "dbinc/fop.h"
+
+/*
+ * PUBLIC: int __crdel_init_verify __P((ENV *, DB_DISTAB *));
+ */
+int
+__crdel_init_verify(env, dtabp)
+ ENV *env;
+ DB_DISTAB *dtabp;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __crdel_metasub_verify, DB___crdel_metasub)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __crdel_inmem_create_verify, DB___crdel_inmem_create)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __crdel_inmem_rename_verify, DB___crdel_inmem_rename)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __crdel_inmem_remove_verify, DB___crdel_inmem_remove)) != 0)
+ return (ret);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __db_init_verify __P((ENV *, DB_DISTAB *));
+ */
+int
+__db_init_verify(env, dtabp)
+ ENV *env;
+ DB_DISTAB *dtabp;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_addrem_verify, DB___db_addrem)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_big_verify, DB___db_big)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_ovref_verify, DB___db_ovref)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_debug_verify, DB___db_debug)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_noop_verify, DB___db_noop)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_pg_alloc_verify, DB___db_pg_alloc)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_pg_free_verify, DB___db_pg_free)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_cksum_verify, DB___db_cksum)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_pg_freedata_verify, DB___db_pg_freedata)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_pg_init_verify, DB___db_pg_init)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_pg_trunc_verify, DB___db_pg_trunc)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_realloc_verify, DB___db_realloc)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_relink_verify, DB___db_relink)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_merge_verify, DB___db_merge)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __db_pgno_verify, DB___db_pgno)) != 0)
+ return (ret);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __dbreg_init_verify __P((ENV *, DB_DISTAB *));
+ */
+int
+__dbreg_init_verify(env, dtabp)
+ ENV *env;
+ DB_DISTAB *dtabp;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __dbreg_register_verify, DB___dbreg_register)) != 0)
+ return (ret);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __bam_init_verify __P((ENV *, DB_DISTAB *));
+ */
+int
+__bam_init_verify(env, dtabp)
+ ENV *env;
+ DB_DISTAB *dtabp;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __bam_split_verify, DB___bam_split)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __bam_rsplit_verify, DB___bam_rsplit)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __bam_adj_verify, DB___bam_adj)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __bam_cadjust_verify, DB___bam_cadjust)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __bam_cdel_verify, DB___bam_cdel)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __bam_repl_verify, DB___bam_repl)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __bam_root_verify, DB___bam_root)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __bam_curadj_verify, DB___bam_curadj)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __bam_rcuradj_verify, DB___bam_rcuradj)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __bam_irep_verify, DB___bam_irep)) != 0)
+ return (ret);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __fop_init_verify __P((ENV *, DB_DISTAB *));
+ */
+int
+__fop_init_verify(env, dtabp)
+ ENV *env;
+ DB_DISTAB *dtabp;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __fop_create_verify, DB___fop_create)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __fop_remove_verify, DB___fop_remove)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __fop_write_verify, DB___fop_write)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __fop_rename_verify, DB___fop_rename)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __fop_rename_verify, DB___fop_rename_noundo)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __fop_file_remove_verify, DB___fop_file_remove)) != 0)
+ return (ret);
+ return (0);
+}
+
+#ifdef HAVE_HASH
+/*
+ * PUBLIC: int __ham_init_verify __P((ENV *, DB_DISTAB *));
+ */
+int
+__ham_init_verify(env, dtabp)
+ ENV *env;
+ DB_DISTAB *dtabp;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_insdel_verify, DB___ham_insdel)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_newpage_verify, DB___ham_newpage)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_splitdata_verify, DB___ham_splitdata)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_replace_verify, DB___ham_replace)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_copypage_verify, DB___ham_copypage)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_metagroup_verify, DB___ham_metagroup)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_groupalloc_verify, DB___ham_groupalloc)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_changeslot_verify, DB___ham_changeslot)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_contract_verify, DB___ham_contract)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_curadj_verify, DB___ham_curadj)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __ham_chgpg_verify, DB___ham_chgpg)) != 0)
+ return (ret);
+ return (0);
+}
+
+#endif /* HAVE_HASH */
+#ifdef HAVE_HEAP
+/*
+ * PUBLIC: int __heap_init_verify __P((ENV *, DB_DISTAB *));
+ */
+int
+__heap_init_verify(env, dtabp)
+ ENV *env;
+ DB_DISTAB *dtabp;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __heap_addrem_verify, DB___heap_addrem)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __heap_pg_alloc_verify, DB___heap_pg_alloc)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __heap_trunc_meta_verify, DB___heap_trunc_meta)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __heap_trunc_page_verify, DB___heap_trunc_page)) != 0)
+ return (ret);
+ return (0);
+}
+#endif /* HAVE_HEAP */
+#ifdef HAVE_QUEUE
+/*
+ * PUBLIC: int __qam_init_verify __P((ENV *, DB_DISTAB *));
+ */
+int
+__qam_init_verify(env, dtabp)
+ ENV *env;
+ DB_DISTAB *dtabp;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __qam_incfirst_verify, DB___qam_incfirst)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __qam_mvptr_verify, DB___qam_mvptr)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __qam_del_verify, DB___qam_del)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __qam_add_verify, DB___qam_add)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __qam_delext_verify, DB___qam_delext)) != 0)
+ return (ret);
+ return (0);
+}
+
+#endif /* HAVE_QUEUE */
+/*
+ * PUBLIC: int __txn_init_verify __P((ENV *, DB_DISTAB *));
+ */
+int
+__txn_init_verify(env, dtabp)
+ ENV *env;
+ DB_DISTAB *dtabp;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __txn_regop_verify, DB___txn_regop)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __txn_ckp_verify, DB___txn_ckp)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __txn_child_verify, DB___txn_child)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __txn_prepare_verify, DB___txn_prepare)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery_int(env, dtabp,
+ __txn_recycle_verify, DB___txn_recycle)) != 0)
+ return (ret);
+ return (0);
+}
diff --git a/src/log/log_verify_int.c b/src/log/log_verify_int.c
new file mode 100644
index 00000000..abe564c6
--- /dev/null
+++ b/src/log/log_verify_int.c
@@ -0,0 +1,4353 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+/*
+ * This file contains verification functions for all types of log records,
+ * one for each type. We can't make this automated like the log_type_print/read
+ * functions because there are no consistent handling. Each type of log records
+ * have unique ways to verify, and unique information to extract.
+ *
+ * In each verification function, we first call the log_type_read function
+ * to get the log_type_args structure, then extract information according to
+ * the type of log. The log types can be made into different categories, each
+ * of which have similar types of information.
+ *
+ * For example, txn_regop and txn_ckp types both have timestamps, and we
+ * want to maintain (timestamp,lsn) mapping, so we will have a on_timestamp
+ * function, and call it in txn_regop_verify and txn_ckp_verify functions,
+ * and in the two functions we may call other on_*** functions to extract and
+ * verify other information.
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/fop.h"
+#include "dbinc/hash.h"
+#include "dbinc/heap.h"
+#include "dbinc/qam.h"
+#include "dbinc/txn.h"
+
+#include "dbinc/log_verify.h"
+
+static int __log_vrfy_proc __P((DB_LOG_VRFY_INFO *, DB_LSN, DB_LSN,
+ u_int32_t, DB_TXN *, int32_t, int *));
+static int __lv_ckp_vrfy_handler __P((DB_LOG_VRFY_INFO *,
+ VRFY_TXN_INFO *, void *));
+static const char *__lv_dbreg_str __P((u_int32_t));
+static int __lv_dbregid_to_dbtype __P((DB_LOG_VRFY_INFO *, int32_t, DBTYPE *));
+static int __lv_dbt_str __P((const DBT *, char **));
+static const char *__lv_dbtype_str __P((DBTYPE));
+static u_int32_t __lv_first_offset __P((ENV *));
+static int __lv_new_logfile_vrfy __P((DB_LOG_VRFY_INFO *, const DB_LSN *));
+static int __lv_log_fwdscr_oncmt __P((DB_LOG_VRFY_INFO *, DB_LSN,
+ u_int32_t, u_int32_t, int32_t));
+static int __lv_log_fwdscr_onrec __P((DB_LOG_VRFY_INFO *,
+ u_int32_t, u_int32_t, DB_LSN, DB_LSN));
+static int __lv_log_mismatch __P((DB_LOG_VRFY_INFO *, DB_LSN, DBTYPE, DBTYPE));
+static int __lv_on_bam_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t));
+static int __lv_on_ham_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t));
+static int __lv_on_heap_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t));
+static int __lv_on_new_txn __P((DB_LOG_VRFY_INFO *, const DB_LSN *,
+ const DB_TXN *, u_int32_t, int32_t, const DBT *));
+static int __lv_on_nontxn_update __P((DB_LOG_VRFY_INFO *, const DB_LSN *,
+ u_int32_t, u_int32_t, int32_t));
+static int __lv_on_page_update __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t,
+ db_pgno_t, DB_TXN *, int *));
+static int __lv_on_qam_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t));
+static int __lv_on_timestamp __P((DB_LOG_VRFY_INFO *, const DB_LSN *,
+ int32_t, u_int32_t));
+static int __lv_on_txn_aborted __P((DB_LOG_VRFY_INFO *));
+static int __lv_on_txn_logrec __P((DB_LOG_VRFY_INFO *, const DB_LSN *,
+ const DB_LSN *, const DB_TXN *, u_int32_t, int32_t));
+static int __lv_vrfy_for_dbfile __P((DB_LOG_VRFY_INFO *, int32_t, int *));
+
+/* General error handlers, called when a check fails. */
+#define ON_ERROR(lvh, errv) do { \
+ (lvh)->flags |= (errv); \
+ if (F_ISSET((lvh), DB_LOG_VERIFY_CAF)) \
+ ret = 0;/* Ignore the error and continue. */ \
+ goto err; \
+} while (0)
+
+/* Used by logs of unsupported types. */
+#define ON_NOT_SUPPORTED(env, lvh, lsn, ltype) do { \
+ __db_errx((env), DB_STR_A("2536", \
+ "[%lu][%lu] Not supported type of log record %u.", \
+ "%lu %lu %u"), (u_long)((lsn).file), (u_long)((lsn).offset),\
+ (ltype)); \
+ (lvh)->unknown_logrec_cnt++; \
+ goto err; \
+} while (0)
+
+#define SKIP_FORWARD_CHK(type) ((type) != DB___txn_regop && \
+ (type) != DB___txn_ckp && (type) != DB___fop_rename && \
+ (type) != DB___txn_child)
+
+#define NOTCOMMIT(type) ((type) != DB___txn_regop && \
+ (type) != DB___txn_child)
+
+#define LOG_VRFY_PROC(lvh, lsn, argp, fileid) do { \
+ int __lv_log_vrfy_proc_step = 0; \
+ if ((ret = __log_vrfy_proc((lvh), (lsn), (argp)->prev_lsn, \
+ (argp)->type, (argp)->txnp, (fileid), \
+ &__lv_log_vrfy_proc_step)) != 0) \
+ goto err; \
+ if (__lv_log_vrfy_proc_step == 1) \
+ goto out; \
+ else if (__lv_log_vrfy_proc_step == -1) \
+ goto err; \
+ else \
+ DB_ASSERT(lvh->dbenv->env, \
+ __lv_log_vrfy_proc_step == 0); \
+} while (0)
+
+/* Log record handlers used by log types involving page updates. */
+#define ON_PAGE_UPDATE(lvh, lsn, argp, pgno) do { \
+ int __lv_onpgupdate_res; \
+ if ((ret = __lv_on_page_update((lvh), (lsn), (argp)->fileid, \
+ (pgno), (argp)->txnp, &__lv_onpgupdate_res)) != 0) \
+ goto err; \
+ if (__lv_onpgupdate_res == 1) \
+ goto out; \
+ else if (__lv_onpgupdate_res == -1) \
+ goto err; \
+ else \
+ DB_ASSERT(lvh->dbenv->env, __lv_onpgupdate_res == 0); \
+} while (0)
+
+static int
+__lv_on_page_update(lvh, lsn, fileid, pgno, txnp, step)
+ DB_LOG_VRFY_INFO *lvh;
+ DB_LSN lsn;
+ int32_t fileid;
+ db_pgno_t pgno;
+ DB_TXN *txnp;
+ int *step;
+{
+ u_int32_t otxn, txnid;
+ int res, ret;
+
+ txnid = txnp->txnid;
+ res = ret = 0;
+
+ if ((ret = __add_page_to_txn(lvh, fileid, pgno,
+ txnid, &otxn, &res)) != 0)
+ ON_ERROR(lvh, DB_LOG_VERIFY_INTERR);
+ if (res != -1) {/* No access violation, we are done. */
+ *step = 0;
+ goto out;
+ }
+ /*
+ * It's OK for a child txn to update its parent's page, but not OK
+ * for a parent txn to update its active child's pages. We can't
+ * detect the child's abort, so we may false alarm that a parent txn
+ * is updating its child's pages.
+ */
+ if ((ret = __is_ancestor_txn(lvh, otxn, txnid, lsn, &res)) != 0)
+ ON_ERROR(lvh, DB_LOG_VERIFY_INTERR);
+ if (res) {/* The txnid is updating its parent otxn's pages. */
+ *step = 0;
+ goto out;
+ }
+ if ((ret = __is_ancestor_txn(lvh, txnid, otxn, lsn, &res)) != 0)
+ ON_ERROR(lvh, DB_LOG_VERIFY_INTERR);
+ if (res) {/* The txnid is updating its active child otxn's pages. */
+ __db_errx(lvh->dbenv->env, DB_STR_A("2537",
+ "[%lu][%lu] [WARNING] Parent txn %lx is updating its "
+ "active child txn %lx's pages, or %lx aborted.",
+ "%lu %lu %lx %lx %lx"), (u_long)lsn.file,
+ (u_long)lsn.offset, (u_long)txnid,
+ (u_long)otxn, (u_long)otxn);
+ *step = 0;
+ goto out;
+ }
+ /*
+ * It's likely that the two txns are parent-child and the child
+ * aborted, but from the log we can't figure out this fact.
+ */
+ __db_errx(lvh->dbenv->env, DB_STR_A("2538",
+ "[%lu][%lu] [WARNING] Txn %lx is updating txn %lx's pages.",
+ "%lu %lu %lx %lx"), (u_long)lsn.file, (u_long)lsn.offset,
+ (u_long)txnid, (u_long)otxn);
+ *step = 0;
+out:
+err:
+ return (ret);
+}
+
+/*
+ * This macro is put in all types of verify functions where a db file is
+ * updated, but no page number/lock involved.
+ */
+#define ON_PAGE_UPDATE4
+
+/*
+ * General log record handler used by all log verify functions.
+ */
+static int
+__log_vrfy_proc(lvh, lsn, prev_lsn, type, txnp, fileid, step)
+ DB_LOG_VRFY_INFO *lvh;
+ DB_LSN lsn, prev_lsn;
+ u_int32_t type; /* Log record type. */
+ DB_TXN *txnp;
+ int32_t fileid;
+ int *step;
+{
+ int dovrfy, ret;
+
+ dovrfy = 1;
+ ret = 0;
+ /*
+ * step is used to tell if go on with the rest of the caller, or
+ * goto err/out.
+ * 0: go on after this function; 1: goto out; -1: goto err.
+ */
+ *step = 0;
+
+ if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) {
+ /* Commits are not abort/beginnings. */
+ if (NOTCOMMIT(type) && ((ret = __lv_log_fwdscr_onrec(
+ lvh, txnp->txnid, type, prev_lsn, lsn)) != 0))
+ goto err;
+ if (SKIP_FORWARD_CHK(type))
+ goto out;
+ } else {/* Verifying */
+ if (F_ISSET(lvh, DB_LOG_VERIFY_VERBOSE))
+ __db_errx(lvh->dbenv->env, DB_STR_A("2539",
+ "[%lu][%lu] Verifying log record of type %s",
+ "%lu %lu %s"), (u_long)lsn.file,
+ (u_long)lsn.offset, LOGTYPE_NAME(lvh, type));
+ /*
+ * If verifying a log range and we've passed the initial part
+ * which may have partial txns, remove the PARTIAL bit.
+ */
+ if (F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL) &&
+ LOG_COMPARE(&lsn, &(lvh->valid_lsn)) >= 0) {
+ lvh->valid_lsn.offset = lvh->valid_lsn.file = 0;
+ F_CLR(lvh, DB_LOG_VERIFY_PARTIAL);
+ }
+
+ if ((ret = __lv_new_logfile_vrfy(lvh, &lsn)) != 0)
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ /* If only verify a db file, ignore logs about other dbs. */
+ if (F_ISSET(lvh, DB_LOG_VERIFY_DBFILE) && fileid !=
+ INVAL_DBREGID && (ret = __lv_vrfy_for_dbfile(lvh,
+ fileid, &dovrfy)) != 0)
+ goto err;
+ if (!dovrfy)
+ goto out;
+ if (lvh->aborted_txnid != 0 &&
+ ((ret = __lv_on_txn_aborted(lvh)) != 0))
+ goto err;
+ if ((ret = __get_aborttxn(lvh, lsn)) != 0)
+ goto err;
+ if (txnp->txnid >= TXN_MINIMUM) {
+ if ((ret = __lv_on_txn_logrec(lvh, &lsn, &(prev_lsn),
+ txnp, type, fileid)) != 0)
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ } else {/* Non-txnal updates. */
+ if ((ret = __lv_on_nontxn_update(lvh, &lsn,
+ txnp->txnid, type, fileid)) != 0)
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+ }
+ if (0) {
+out:
+ *step = 1;
+ }
+ if (0) {
+err:
+ *step = -1;
+ }
+ return (ret);
+}
+
+/* Log record handlers used by log types for each access method. */
+static int
+__lv_on_bam_log(lvh, lsn, fileid)
+ DB_LOG_VRFY_INFO *lvh;
+ DB_LSN lsn;
+ int32_t fileid;
+{
+ int ret;
+ DBTYPE dbtype;
+ if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 &&
+ dbtype != DB_BTREE && dbtype != DB_RECNO && dbtype != DB_HASH)
+ ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_BTREE);
+ if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL))
+ ret = 0;
+ return (ret);
+}
+
+static int
+__lv_on_ham_log(lvh, lsn, fileid)
+ DB_LOG_VRFY_INFO *lvh;
+ DB_LSN lsn;
+ int32_t fileid;
+{
+ int ret;
+ DBTYPE dbtype;
+ if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 &&
+ dbtype != DB_HASH)
+ ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_HASH);
+ if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL))
+ ret = 0;
+ return (ret);
+}
+
+static int
+__lv_on_heap_log(lvh, lsn, fileid)
+ DB_LOG_VRFY_INFO *lvh;
+ DB_LSN lsn;
+ int32_t fileid;
+{
+ int ret;
+ DBTYPE dbtype;
+ if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 &&
+ dbtype != DB_HEAP)
+ ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_HEAP);
+ if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL))
+ ret = 0;
+ return (ret);
+}
+
+static int
+__lv_on_qam_log(lvh, lsn, fileid)
+ DB_LOG_VRFY_INFO *lvh;
+ DB_LSN lsn;
+ int32_t fileid;
+{
+ int ret;
+ DBTYPE dbtype;
+ if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 &&
+ dbtype != DB_QUEUE)
+ ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_QUEUE);
+ if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL))
+ ret = 0;
+ return (ret);
+}
+
+/* Catch commits and store into lvinfo->txnrngs database. */
+static int
+__lv_log_fwdscr_oncmt(lvinfo, lsn, txnid, ptxnid, timestamp)
+ DB_LOG_VRFY_INFO *lvinfo;
+ DB_LSN lsn;
+ u_int32_t txnid, ptxnid;
+ int32_t timestamp;
+{
+ int ret;
+ struct __lv_txnrange tr;
+ DBT key, data;
+
+ memset(&tr, 0, sizeof(tr));
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ tr.txnid = txnid;
+ tr.end = lsn;
+ tr.when_commit = timestamp;
+ tr.ptxnid = ptxnid;
+ key.data = &(txnid);
+ key.size = sizeof(txnid);
+ data.data = &tr;
+ data.size = sizeof(tr);
+ if ((ret = __db_put(lvinfo->txnrngs, lvinfo->ip, NULL,
+ &key, &data, 0)) != 0)
+ goto err;
+err:
+ return (ret);
+}
+
+/* Catch aborts and txn beginnings and store into lvinfo->txnrngs database. */
+static int
+__lv_log_fwdscr_onrec(lvinfo, txnid, lrtype, prevlsn, lsn)
+ DB_LOG_VRFY_INFO *lvinfo;
+ u_int32_t txnid, lrtype;
+ DB_LSN prevlsn, lsn;
+{
+ int doput, ret, ret2, tret;
+ u_int32_t putflag;
+ struct __lv_txnrange tr, *ptr;
+ DBC *csr;
+ DBT key, key2, data, data2;
+
+ /* Ignore non-txnal log records. */
+ if (txnid < TXN_MINIMUM)
+ return (0);
+
+ /* Not used for now, but may be used later. Pass lint checks. */
+ COMPQUIET(lrtype ,0);
+ putflag = 0;
+ doput = ret = ret2 = 0;
+ csr = NULL;
+ memset(&tr, 0, sizeof(tr));
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ memset(&key2, 0, sizeof(DBT));
+ memset(&data2, 0, sizeof(DBT));
+ key.data = &txnid;
+ key.size = sizeof(txnid);
+ tr.txnid = txnid;
+ tr.when_commit = 0;/* This is not a __txn_regop record. */
+
+ if ((ret = __db_cursor(lvinfo->txnrngs, lvinfo->ip,
+ NULL, &csr, 0)) != 0)
+ goto err;
+ /*
+ * If the txnid is first seen here or reused later, it's aborted
+ * after this log record; if this log record is the 1st one of a txn,
+ * we have the beginning of the txn; otherwise the log record is one
+ * of the actions taken within the txn, and we don't do anything.
+ */
+ if ((ret = __dbc_get(csr, &key, &data, DB_SET)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+
+ ptr = (struct __lv_txnrange *)data.data;
+ if (ret == DB_NOTFOUND || !IS_ZERO_LSN(ptr->begin)) {
+ tr.end = lsn;
+ data.data = &tr;
+ data.size = sizeof(tr);
+ doput = 1;
+ key2.data = &lsn;
+ key2.size = sizeof(lsn);
+ data2.data = &(tr.txnid);
+ data2.size = sizeof(tr.txnid);
+ putflag = DB_KEYFIRST;
+ if ((ret2 = __db_put(lvinfo->txnaborts, lvinfo->ip, NULL,
+ &key2, &data2, 0)) != 0) {
+ ret = ret2;
+ goto err;
+ }
+ } else if (ret == 0 && IS_ZERO_LSN(prevlsn)) {/* The beginning of txn.*/
+ /* The begin field must be [0, 0]. */
+ DB_ASSERT(lvinfo->dbenv->env, IS_ZERO_LSN(ptr->begin));
+ ptr->begin = lsn;
+ putflag = DB_CURRENT;
+ doput = 1;
+ }
+
+ if (doput && (ret = __dbc_put(csr, &key, &data, putflag)) != 0)
+ goto err;
+err:
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+
+ return (ret);
+}
+
+/*
+ * Return 0 from dovrfy if verifying logs for a specified db file, and fileid
+ * is not the one we want; Otherwise return 1 from dovrfy. If DB operations
+ * failed, the error is returned.
+ */
+static int
+__lv_vrfy_for_dbfile(lvh, fileid, dovrfy)
+ DB_LOG_VRFY_INFO *lvh;
+ int32_t fileid;
+ int *dovrfy;
+{
+ u_int8_t tmpuid[DB_FILE_ID_LEN];
+ VRFY_FILEREG_INFO *fregp;
+ u_int32_t i;
+ int ret, tret;
+ DBT tgtkey;
+
+ ret = tret = 0;
+ *dovrfy = 0;
+ fregp = NULL;
+ memset(tmpuid, 0, sizeof(u_int8_t) * DB_FILE_ID_LEN);
+ memset(&tgtkey, 0, sizeof(tgtkey));
+ tgtkey.data = lvh->target_dbid;
+ tgtkey.size = DB_FILE_ID_LEN;
+ ret = __get_filereg_info(lvh, &tgtkey, &fregp);
+
+ /*
+ * If the target db file is not seen yet, we don't verify any file,
+ * and it does not mean anything wrong.
+ */
+ if (ret == DB_NOTFOUND) {
+ ret = 0;
+ goto out;
+ }
+ if (ret != 0)
+ goto err;
+
+ for (i = 0; i < fregp->regcnt; i++)
+ if (fregp->dbregids[i] == fileid) {
+ *dovrfy = 1;
+ goto out;
+ }
+out:
+err:
+ if (fregp != NULL &&
+ (tret = __free_filereg_info(fregp)) != 0 && ret == 0)
+ ret = tret;
+
+ return (ret);
+}
+
+static int
+__lv_log_mismatch(lvh, lsn, dbtype, exp_dbtype)
+ DB_LOG_VRFY_INFO *lvh;
+ DB_LSN lsn;
+ DBTYPE dbtype, exp_dbtype;
+{
+ int ret;
+
+ __db_errx(lvh->dbenv->env, DB_STR_A("2540",
+ "[%lu][%lu] Log record type does not match related database type, "
+ "current database type: %s, expected database type according to "
+ "the log record type: %s.", "%lu %lu %s %s"),
+ (u_long)lsn.file, (u_long)lsn.offset, __lv_dbtype_str(dbtype),
+ __lv_dbtype_str(exp_dbtype));
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+err:
+ return (ret);
+}
+
+static int
+__lv_dbregid_to_dbtype(lvh, id, ptype)
+ DB_LOG_VRFY_INFO *lvh;
+ int32_t id;
+ DBTYPE *ptype;
+{
+ int ret;
+ VRFY_FILELIFE *pflife;
+
+ ret = 0;
+ pflife = NULL;
+
+ if ((ret = __get_filelife(lvh, id, &pflife)) != 0)
+ goto err;
+ *ptype = pflife->dbtype;
+err:
+ if (pflife != NULL)
+ __os_free(lvh->dbenv->env, pflife);
+
+ return (ret);
+}
+
+/*
+ * __db_log_verify_global_report --
+ * Report statistics data in DB_LOG_VRFY_INFO handle.
+ *
+ * PUBLIC: void __db_log_verify_global_report __P((const DB_LOG_VRFY_INFO *));
+ */
+void __db_log_verify_global_report (lvinfo)
+ const DB_LOG_VRFY_INFO *lvinfo;
+{
+ u_int32_t i, nltype;
+
+ __db_msg(lvinfo->dbenv->env,
+ "Number of active transactions: %u;", lvinfo->ntxn_active);
+ __db_msg(lvinfo->dbenv->env,
+ "Number of committed transactions: %u;", lvinfo->ntxn_commit);
+ __db_msg(lvinfo->dbenv->env,
+ "Number of aborted transactions: %u;", lvinfo->ntxn_abort);
+ __db_msg(lvinfo->dbenv->env,
+ "Number of prepared transactions: %u;", lvinfo->ntxn_prep);
+ __db_msg(lvinfo->dbenv->env,
+ "Total number of checkpoint: %u;", lvinfo->nckp);
+ __db_msg(lvinfo->dbenv->env,
+ "Total number of non-transactional updates: %u;",
+ lvinfo->non_txnup_cnt);
+ __db_msg(lvinfo->dbenv->env,
+ "Total number of unknown log records: %u;",
+ lvinfo->unknown_logrec_cnt);
+ __db_msg(lvinfo->dbenv->env,
+ "Total number of app-specific log record: %u;",
+ lvinfo->external_logrec_cnt);
+ __db_msg(lvinfo->dbenv->env,
+ "The number of each type of log record:");
+
+ for (i = 0; i < 256; i++) {
+ nltype = lvinfo->lrtypes[i];
+ if (LOGTYPE_NAME(lvinfo, i) != NULL)
+ __db_msg(lvinfo->dbenv->env, "\n\t%s : %u;",
+ LOGTYPE_NAME(lvinfo, i), nltype);
+ }
+}
+
+/*
+ * PUBLIC: int __crdel_metasub_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__crdel_metasub_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __crdel_metasub_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __crdel_metasub_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __crdel_inmem_create_verify __P((ENV *, DBT *,
+ * PUBLIC: DB_LSN *, db_recops, void *));
+ */
+int
+__crdel_inmem_create_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __crdel_inmem_create_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __crdel_inmem_create_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __crdel_inmem_rename_verify __P((ENV *, DBT *,
+ * PUBLIC: DB_LSN *, db_recops, void *));
+ */
+int
+__crdel_inmem_rename_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __crdel_inmem_rename_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __crdel_inmem_rename_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __crdel_inmem_remove_verify __P((ENV *, DBT *,
+ * PUBLIC: DB_LSN *, db_recops, void *));
+ */
+int
+__crdel_inmem_remove_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __crdel_inmem_remove_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __crdel_inmem_remove_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_addrem_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_addrem_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_addrem_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_addrem_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_big_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_big_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_big_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_big_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_ovref_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_ovref_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_ovref_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_ovref_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_relink_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_relink_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_relink_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_relink_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_debug_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_debug_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_debug_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __db_debug_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_noop_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_noop_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_noop_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_noop_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_pg_alloc_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_pg_alloc_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_pg_alloc_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_pg_alloc_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_pg_alloc_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_pg_alloc_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_pg_alloc_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_pg_alloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_pg_free_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_pg_free_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_pg_free_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_pg_free_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_pg_free_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_pg_free_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_pg_free_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_pg_free_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_cksum_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_cksum_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_cksum_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_pg_freedata_42_verify __P((ENV *, DBT *,
+ * PUBLIC: DB_LSN *, db_recops, void *));
+ */
+int
+__db_pg_freedata_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_pg_freedata_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_pg_freedata_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_pg_freedata_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_pg_freedata_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_pg_freedata_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_pg_freedata_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_pg_init_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_pg_init_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_pg_init_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_pg_init_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_pg_sort_44_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_pg_sort_44_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_pg_sort_44_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_pg_sort_44_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_pg_trunc_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_pg_trunc_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_pg_trunc_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_pg_trunc_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE4 /* No pages are locked by txns. */
+out:
+err:
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_realloc_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_realloc_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_realloc_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_realloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE4 /* No pages are locked by txns. */
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_relink_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_relink_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_relink_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_relink_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_merge_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_merge_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_merge_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_merge_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __db_pgno_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__db_pgno_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __db_pgno_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __db_pgno_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+static const char *
+__lv_dbreg_str(op)
+ u_int32_t op;
+{
+ const char *p;
+
+ switch (op) {
+ case DBREG_CHKPNT:
+ p = "DBREG_CHKPNT";
+ break;
+ case DBREG_RCLOSE:
+ p = "DBREG_RCLOSE";
+ break;
+ case DBREG_CLOSE:
+ p = "DBREG_CLOSE";
+ break;
+ case DBREG_OPEN:
+ p = "DBREG_OPEN";
+ break;
+ case DBREG_PREOPEN:
+ p = "DBREG_PREOPEN";
+ break;
+ case DBREG_REOPEN:
+ p = "DBREG_REOPEN";
+ break;
+ case DBREG_XCHKPNT:
+ p = "DBREG_XCHKPNT";
+ break;
+ case DBREG_XOPEN:
+ p = "DBREG_XOPEN";
+ break;
+ case DBREG_XREOPEN:
+ p = "DBREG_XREOPEN";
+ break;
+ default:
+ p = DB_STR_P("Unknown dbreg op code");
+ break;
+ }
+
+ return (p);
+}
+
+static int
+__lv_dbt_str(dbt, str)
+ const DBT *dbt;
+ char **str;
+{
+ char *p, *q;
+ u_int32_t buflen, bufsz, i;
+ int ret;
+
+ ret = 0;
+ p = q = NULL;
+ buflen = bufsz = i = 0;
+ bufsz = sizeof(char) * dbt->size * 2;
+
+ if ((ret = __os_malloc(NULL, bufsz, &p)) != 0)
+ goto err;
+ q = (char *)dbt->data;
+
+ memset(p, 0, bufsz);
+ /*
+ * Each unprintable character takes up several bytes, so be ware of
+ * memory access violation.
+ */
+ for (i = 0; i < dbt->size && buflen < bufsz; i++) {
+ buflen = (u_int32_t)strlen(p);
+ snprintf(p + buflen, bufsz - (buflen + 1),
+ isprint(q[i]) || q[i] == 0x0a ? "%c" : "%x", q[i]);
+ }
+ *str = p;
+err:
+ return (ret);
+}
+
+static const char *
+__lv_dbtype_str(dbtype)
+ DBTYPE dbtype;
+{
+ char *p;
+
+ switch (dbtype) {
+ case DB_BTREE:
+ p = "DB_BTREE";
+ break;
+ case DB_HASH:
+ p = "DB_HASH";
+ break;
+ case DB_RECNO:
+ p = "DB_RECNO";
+ break;
+ case DB_QUEUE:
+ p = "DB_QUEUE";
+ break;
+ default:
+ p = DB_STR_P("Unknown db type");
+ break;
+ }
+
+ return (p);
+}
+
+/*
+ * PUBLIC: int __dbreg_register_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__dbreg_register_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __dbreg_register_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ VRFY_FILEREG_INFO *fregp, freg;
+ VRFY_FILELIFE *pflife, flife;
+ int checklife, rmv_dblife, ret, ret2;
+ u_int32_t opcode;
+ char *puid;
+ const char *dbfname;
+
+ dbfname = NULL;
+ checklife = 1;
+ opcode = 0;
+ ret = ret2 = rmv_dblife = 0;
+ puid = NULL;
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+ fregp = NULL;
+ pflife = NULL;
+ memset(&flife, 0, sizeof(flife));
+ memset(&freg, 0, sizeof(freg));
+
+ if ((ret = __dbreg_register_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ opcode = FLD_ISSET(argp->opcode, DBREG_OP_MASK);
+ dbfname = argp->name.size == 0 ? "(null)" : (char *)(argp->name.data);
+ /*
+ * We don't call LOG_VRFY_PROC macro here, so we have to copy the code
+ * snippet in __log_vrfy_proc here.
+ */
+ if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) {
+ if ((ret = __lv_log_fwdscr_onrec(lvh, argp->txnp->txnid,
+ argp->type, argp->prev_lsn, *lsnp)) != 0)
+ goto err;
+ goto out;
+ }
+ if (lvh->aborted_txnid != 0 && (ret = __lv_on_txn_aborted(lvh)) != 0)
+ goto err;
+
+ if ((ret = __get_filereg_info(lvh, &(argp->uid), &fregp)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+
+ /*
+ * When DBREG_CLOSE, we should remove the fileuid-filename mapping
+ * from filereg because the file can be opened again with a different
+ * fileuid after closed.
+ */
+ if (ret == 0 && IS_DBREG_CLOSE(opcode)) {
+ if ((ret = __db_del(lvh->fileregs, lvh->ip, NULL,
+ &(argp->uid), 0)) != 0)
+ goto err;
+ }
+
+ /*
+ * If this db file is seen for the 1st time, store filereg and
+ * filelife info. Since we will do a end-to-begin scan before the
+ * verification, we will be able to get the record but it's regcnt
+ * is 0 since we didn't know any dbregid yet.
+ */
+ if (ret == DB_NOTFOUND || fregp->regcnt == 0) {
+ /* Store filereg info unless it's a CLOSE. */
+ freg.fileid = argp->uid;
+ if (!IS_DBREG_CLOSE(opcode)) {
+ freg.regcnt = 1;
+ freg.dbregids = &(argp->fileid);
+ } else {
+ freg.regcnt = 0;
+ freg.dbregids = NULL;
+ }
+ if (ret == DB_NOTFOUND) {
+ /*
+ * If the db file is an in-memory db file, we can arrive
+ * here because there is no __fop_rename log for it;
+ * if the __fop_rename log record is out of the log range we
+ * verify, we will also arrive here.
+ */
+ if ((ret = __os_malloc(env, argp->name.size + 1,
+ &(freg.fname))) != 0)
+ goto err;
+ memset(freg.fname, 0,
+ sizeof(char) * (argp->name.size + 1));
+ (void)strncpy(freg.fname,
+ (const char *)(argp->name.data), argp->name.size);
+ } else /* We already have the name. */
+ if ((ret = __os_strdup(env,
+ fregp->fname, &(freg.fname))) != 0)
+ goto err;
+
+ if (!IS_DBREG_OPEN(opcode) &&
+ !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) {
+ /* It's likely that the DBREG_OPEN is not seen.*/
+ __db_msg(env, DB_STR_A("2541",
+ "[%lu][%lu] Suspicious dbreg operation: %s, the "
+ "database file %s's register in log region does "
+ "not begin with an open operation.",
+ "%lu %lu %s %s"), (u_long)lsnp->file,
+ (u_long)lsnp->offset,
+ __lv_dbreg_str(opcode), dbfname);
+ }
+
+ /*
+ * PREOPEN is only generated when opening an in-memory db.
+ * Because we need to log the fileid we're allocating, but we
+ * don't have all the details yet, we are preopening the
+ * database and will actually complete the open later. So
+ * PREOPEN is not a real open, and the log should be ignored
+ * in log_verify.
+ * If fileuid is in a CLOSE operation there is no need to
+ * record it.
+ */
+ if ((opcode != DBREG_PREOPEN) && !IS_DBREG_CLOSE(opcode) &&
+ (ret = __put_filereg_info(lvh, &freg)) != 0)
+ goto err;
+
+ /* Store filelife info unless it's a CLOSE dbreg operation. */
+ if (!IS_DBREG_CLOSE(opcode)) {
+ flife.lifetime = opcode;
+ flife.dbregid = argp->fileid;
+ flife.lsn = *lsnp;
+ flife.dbtype = argp->ftype;
+ flife.meta_pgno = argp->meta_pgno;
+ memcpy(flife.fileid, argp->uid.data, argp->uid.size);
+ if ((ret = __put_filelife(lvh, &flife)) != 0)
+ goto err;
+ }
+ /* on_txn_logrec relies on the freg info in db first. */
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ goto out;
+ }
+
+ /*
+ * Add dbregid if it's new, and store the file register info; or
+ * remove dbregid from fregp if we are closing the file.
+ */
+ if ((ret = __add_dbregid(lvh, fregp, argp->fileid,
+ opcode, *lsnp, argp->ftype, argp->meta_pgno, &ret2)) != 0)
+ goto err;
+ ret = ret2;
+ if (ret != 0 && ret != 1 && ret != 2 && ret != -1)
+ goto err;/* DB operation error. */
+ if (ret != 0) {
+ /* Newly seen dbregid does not need to check life. */
+ if (ret == 1)
+ checklife = 0;
+ else if (ret == -1)
+ rmv_dblife = 1;/* The dbreg file id is closed. */
+ else if (ret == 2) {
+ __db_errx(env, DB_STR_A("2542",
+ "[%lu][%lu] Wrong dbreg operation "
+ "sequence, opening %s for id %d which is already "
+ "open.", "%lu %lu %s %d"),
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ dbfname, argp->fileid);
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+ if (!rmv_dblife && (ret = __put_filereg_info(lvh, fregp)) != 0)
+ goto err;
+ }
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+ if (!checklife)
+ goto out;
+
+ /*
+ * Verify the database type does not change, and the lifetime of a
+ * db file follow an open/chkpnt->[chkpnt]->close order.
+ * A VRFY_FILELIFE record is removed from db on DBREG_CLOSE,
+ * and inserted into db on DBREG_OPEN.
+ */
+ if (!IS_DBREG_OPEN(opcode) &&
+ (ret = __get_filelife(lvh, argp->fileid, &pflife)) != 0) {
+ if (ret == DB_NOTFOUND) {
+ if (!F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) {
+ __db_errx(env, DB_STR_A("2543",
+ "[%lu][%lu] Wrong dbreg operation sequence,"
+ "file %s with id %d is first seen of "
+ "status: %s", "%lu %lu %s %d"),
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ dbfname, argp->fileid,
+ __lv_dbreg_str(opcode));
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ } else
+ ret = 0;
+ }
+ goto err;
+ }
+
+ /* Can't go on verifying without pflife. */
+ if (pflife == NULL)
+ goto out;
+ if (argp->ftype != pflife->dbtype) {
+ if ((ret = __lv_dbt_str(&(argp->uid), &puid)) != 0)
+ goto err;
+ __db_errx(env, DB_STR_A("2544",
+ "[%lu][%lu] The dbtype of database file %s with uid %s "
+ " and id %d has changed from %s to %s.",
+ "%lu %lu %s %s %d %s %s"), (u_long)lsnp->file,
+ (u_long)lsnp->offset, dbfname, puid,
+ pflife->dbregid, __lv_dbtype_str(pflife->dbtype),
+ __lv_dbtype_str(argp->ftype));
+
+ __os_free(env, puid);
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+
+ if ((IS_DBREG_CLOSE(opcode) &&
+ (pflife->lifetime != DBREG_CHKPNT ||
+ pflife->lifetime != DBREG_XCHKPNT) &&
+ !IS_DBREG_OPEN(pflife->lifetime))) {
+ __db_errx(env, DB_STR_A("2545",
+ "[%lu][%lu] Wrong dbreg operation sequence for file %s "
+ "with id %d, current status: %s, new status: %s",
+ "%lu %lu %s %d %s %s"), (u_long)lsnp->file,
+ (u_long)lsnp->offset, dbfname, pflife->dbregid,
+ __lv_dbreg_str(pflife->lifetime),
+ __lv_dbreg_str(opcode));
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+
+ pflife->lifetime = opcode;
+ pflife->lsn = *lsnp;
+ if ((!rmv_dblife && (ret = __put_filelife(lvh, pflife)) != 0) ||
+ ((rmv_dblife || IS_DBREG_CLOSE(opcode)) &&
+ ((ret = __del_filelife(lvh, argp->fileid)) != 0)))
+ goto err;
+
+out:
+ /* There may be something to do here in future. */
+err:
+ __os_free(env, argp);
+ if (fregp != NULL &&
+ (ret2 = __free_filereg_info(fregp)) != 0 && ret == 0)
+ ret = ret2;
+ if (freg.fname != NULL)
+ __os_free(env, freg.fname);
+ if (pflife != NULL)
+ __os_free(env, pflife);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_split_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_split_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_split_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_split_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->left);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->right);
+ /* Parent page lock is always released before __bam_page returns. */
+
+ if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_split_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_split_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_split_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_split_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_rsplit_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_rsplit_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_rsplit_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_rsplit_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_adj_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_adj_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_adj_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_adj_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_irep_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_irep_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_irep_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_irep_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_cadjust_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_cadjust_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_cadjust_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_cadjust_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_cdel_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_cdel_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_cdel_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_cdel_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_repl_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_repl_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_repl_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_repl_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_root_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_root_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_root_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_root_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_curadj_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_curadj_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_curadj_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_curadj_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_rcuradj_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_rcuradj_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_rcuradj_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_rcuradj_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_relink_43_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_relink_43_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_relink_43_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_relink_43_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_merge_44_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_merge_44_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __bam_merge_44_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __bam_merge_44_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __fop_create_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__fop_create_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __fop_create_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __fop_create_42_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __fop_create_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__fop_create_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __fop_create_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __fop_create_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __fop_remove_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__fop_remove_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __fop_remove_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __fop_remove_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __fop_write_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__fop_write_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __fop_write_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __fop_write_42_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __fop_write_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__fop_write_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __fop_write_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __fop_write_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+ ON_PAGE_UPDATE4 /* No pages are locked by txns. */
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __fop_rename_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__fop_rename_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __fop_rename_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __fop_rename_42_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __fop_rename_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__fop_rename_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __fop_rename_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ char *buf;
+ int ret;
+ size_t buflen;
+ VRFY_FILEREG_INFO freg, *fregp;
+
+ memset(&freg, 0, sizeof(freg));
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+ buf = NULL;
+
+ if ((ret = __fop_rename_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+ if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) {
+ /*
+ * Since we get the fname-fuid map when iterating from end to
+ * beginning, we only store the latest file name, that's the
+ * name supposed to be used currently. So if the fileid is
+ * already stored, and we see it again here, it means the db
+ * file was renamed and we already have its latest name.
+ *
+ * Store the dbfile path (dir/fname) in case there are db
+ * files with same name in different data directories.
+ */
+ if (__get_filereg_info(lvh, &(argp->fileid), &fregp) == 0) {
+ if (fregp != NULL &&
+ (ret = __free_filereg_info(fregp)) != 0)
+ goto err;
+ goto out;
+ }
+ freg.fileid = argp->fileid;
+ if ((ret = __os_malloc(env, buflen = argp->dirname.size +
+ argp->newname.size + 2, &buf)) != 0)
+ goto err;
+ snprintf(buf, buflen, "%s/%s", (char *)argp->dirname.data,
+ (char *)argp->newname.data);
+ freg.fname = buf;
+ /* Store the dbfilename<-->dbfileid map. */
+ if ((ret = __put_filereg_info(lvh, &freg)) != 0)
+ goto err;
+ }
+out:
+
+err:
+ if (buf != NULL)
+ __os_free(lvh->dbenv->env, buf);
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __fop_file_remove_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__fop_file_remove_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __fop_file_remove_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __fop_file_remove_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+#ifdef HAVE_HASH
+/*
+ * PUBLIC: int __ham_insdel_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_insdel_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_insdel_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_insdel_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_newpage_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_newpage_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_newpage_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_newpage_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+ ON_PAGE_UPDATE4 /* No pages are locked by txns. */
+ if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_splitdata_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_splitdata_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_splitdata_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_splitdata_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_replace_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_replace_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_replace_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_replace_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_copypage_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_copypage_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_copypage_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_copypage_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_metagroup_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_metagroup_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_metagroup_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_metagroup_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_metagroup_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_metagroup_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_metagroup_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_metagroup_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_groupalloc_42_verify __P((ENV *, DBT *,
+ * PUBLIC: DB_LSN *, db_recops, void *));
+ */
+int
+__ham_groupalloc_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_groupalloc_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_groupalloc_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_groupalloc_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_groupalloc_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_groupalloc_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ VRFY_FILELIFE *pflife;
+ int ret;
+
+ ret = 0;
+ pflife = NULL;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_groupalloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE4 /* No pages are locked by txns. */
+
+ /*
+ * The __ham_groupalloc record is only generated when creating the
+ * hash sub database so it will always be on the master database's
+ * fileid.
+ */
+
+ if ((ret = __get_filelife(lvh, argp->fileid, &pflife)) != 0)
+ goto err;
+
+ if (pflife->meta_pgno != PGNO_BASE_MD) {
+ __db_errx(lvh->dbenv->env, DB_STR_A("2546",
+ "[%lu][%lu] __ham_groupalloc should apply only to the "
+ "master database with meta page number 0, current meta "
+ "page number is %d.", "%lu %lu %d"),
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ pflife->meta_pgno);
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+
+out:
+
+err:
+ if (pflife != NULL)
+ __os_free(lvh->dbenv->env, pflife);
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_changeslot_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_changeslot_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_changeslot_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_changeslot_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE4 /* No pages are locked by txns. */
+ if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_contract_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_contract_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_contract_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_contract_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_curadj_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_curadj_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_curadj_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_curadj_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_chgpg_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__ham_chgpg_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __ham_chgpg_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __ham_chgpg_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE4 /* No pages are locked by txns. */
+ if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+#endif
+
+#ifdef HAVE_HEAP
+/*
+ * PUBLIC: int __heap_addrem_verify
+ * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__heap_addrem_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __heap_addrem_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __heap_addrem_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+out:
+
+err:
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __heap_pg_alloc_verify
+ * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__heap_pg_alloc_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __heap_pg_alloc_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __heap_pg_alloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+out:
+
+err:
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __heap_trunc_meta_verify
+ * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__heap_trunc_meta_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __heap_trunc_meta_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __heap_trunc_meta_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __heap_trunc_page_verify
+ * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ */
+int
+__heap_trunc_page_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __heap_trunc_page_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __heap_trunc_page_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno);
+ if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+out:
+
+err:
+ __os_free(env, argp);
+ return (ret);
+}
+#endif
+
+#ifdef HAVE_QUEUE
+/*
+ * PUBLIC: int __qam_incfirst_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__qam_incfirst_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __qam_incfirst_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __qam_incfirst_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __qam_mvptr_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__qam_mvptr_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __qam_mvptr_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __qam_mvptr_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __qam_del_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__qam_del_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __qam_del_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __qam_del_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __qam_add_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__qam_add_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __qam_add_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __qam_add_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid);
+ if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __qam_delext_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__qam_delext_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __qam_delext_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret =
+ __qam_delext_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+ if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0)
+ goto err;
+
+out:
+
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+#endif
+
+/*
+ * PUBLIC: int __txn_regop_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__txn_regop_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __txn_regop_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __txn_regop_42_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __txn_regop_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__txn_regop_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __txn_regop_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret, ret2, started;
+ VRFY_TXN_INFO *ptvi, *pptvi;
+ VRFY_TIMESTAMP_INFO tsinfo;
+
+ ptvi = pptvi = NULL;
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+ ret = ret2 = started = 0;
+
+ if ((ret = __txn_regop_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ /*
+ * The __lv_log_fwdscr_oncmt call must precede LOG_VRFY_PROC otherwise
+ * this txn will be taken as an aborted txn.
+ */
+ if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) {
+ if ((ret = __lv_log_fwdscr_oncmt(lvh, *lsnp,
+ argp->txnp->txnid, 0, argp->timestamp)) != 0)
+ goto err;
+
+ tsinfo.lsn = *lsnp;
+ tsinfo.timestamp = argp->timestamp;
+ tsinfo.logtype = argp->type;
+ if ((ret = __put_timestamp_info(lvh, &tsinfo)) != 0)
+ goto err;
+ goto out; /* We are done. */
+ }
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+ if ((ret = __del_txn_pages(lvh, argp->txnp->txnid)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;/* Some txns may have updated no pages. */
+ if ((ret = __lv_on_timestamp(lvh, lsnp, argp->timestamp,
+ DB___txn_regop)) != 0)
+ goto err;
+ if ((ret = __get_txn_vrfy_info(lvh, argp->txnp->txnid, &ptvi)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+ if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) {
+ if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) &&
+ (ret2 = __txn_started(lvh, lvh->lv_config->start_lsn,
+ argp->txnp->txnid, &started)) == 0 && started != 0) {
+ ret = 0;
+ goto err;
+ }
+ if (ret2 != 0)
+ ret = ret2;
+ __db_errx(lvh->dbenv->env, DB_STR_A("2547",
+ "[%lu][%lu] Can not find an active transaction's "
+ "information, txnid: %lx.", "%lu %lu %lx"),
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ (u_long)argp->txnp->txnid);
+ ON_ERROR(lvh, DB_LOG_VERIFY_INTERR);
+
+ }
+
+ if (ptvi == NULL) {
+ if (ret == DB_NOTFOUND &&
+ F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL))
+ ret = 0;
+ goto out;
+
+ }
+ DB_ASSERT(env, ptvi->ptxnid == 0);
+
+ /*
+ * This log record is only logged when committing a outermost txn,
+ * child txn commits are logged in __txn_child_log.
+ */
+ if (ptvi->ptxnid == 0) {
+ if (ptvi->status == TXN_STAT_PREPARE)
+ lvh->ntxn_prep--;
+ else if (ptvi->status == TXN_STAT_ACTIVE)
+ lvh->ntxn_active--;
+ lvh->ntxn_commit++;
+ }
+ ptvi->status = TXN_STAT_COMMIT;
+ DB_ASSERT(env, IS_ZERO_LSN(ptvi->last_lsn));
+ ptvi->last_lsn = *lsnp;
+ if ((ret = __put_txn_vrfy_info(lvh, ptvi)) != 0)
+ goto err;
+
+ /* Report txn stats. */
+ if (F_ISSET(lvh, DB_LOG_VERIFY_VERBOSE))
+ __db_msg(env, DB_STR_A("2548",
+ "[%lu][%lu] The number of active, committed and aborted "
+ "child txns of txn %lx: %u, %u, %u.",
+ "%lu %lu %lx %u %u %u"), (u_long)lsnp->file,
+ (u_long)lsnp->offset, (u_long)ptvi->txnid,
+ ptvi->nchild_active, ptvi->nchild_commit,
+ ptvi->nchild_abort);
+out:
+err:
+
+ if (pptvi != NULL && (ret2 = __free_txninfo(pptvi)) != 0 && ret == 0)
+ ret = ret2;
+ if (ptvi != NULL && (ret2 = __free_txninfo(ptvi)) != 0 && ret == 0)
+ ret = ret2;
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __txn_ckp_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__txn_ckp_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __txn_ckp_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __txn_ckp_42_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */
+err:
+
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __txn_ckp_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__txn_ckp_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __txn_ckp_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ VRFY_CKP_INFO *lastckp, ckpinfo;
+ int ret;
+ struct __ckp_verify_params cvp;
+ VRFY_TIMESTAMP_INFO tsinfo;
+ char timebuf[CTIME_BUFLEN];
+ time_t ckp_time, lastckp_time;
+
+ lastckp = NULL;
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+ memset(&ckpinfo, 0, sizeof(ckpinfo));
+ memset(&cvp, 0, sizeof(cvp));
+
+ if ((ret = __txn_ckp_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+ if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) {
+ tsinfo.lsn = *lsnp;
+ tsinfo.timestamp = argp->timestamp;
+ tsinfo.logtype = argp->type;
+ /*
+ * Store the first ckp_lsn, or the least one greater than the
+ * starting point. There will be no partial txns after
+ * valid_lsn.
+ */
+ if (!(!IS_ZERO_LSN(lvh->lv_config->start_lsn) &&
+ LOG_COMPARE(&(lvh->lv_config->start_lsn),
+ &(argp->ckp_lsn)) > 0))
+ lvh->valid_lsn = argp->ckp_lsn;
+ if ((ret = __put_timestamp_info(lvh, &tsinfo)) != 0)
+ goto err;
+ goto out;/* We are done, exit. */
+ }
+ lvh->nckp++;
+ ckp_time = (time_t)argp->timestamp;
+ __db_msg(env, DB_STR_A("2549",
+ "[%lu][%lu] Checkpoint record, ckp_lsn: [%lu][%lu], "
+ "timestamp: %s. Total checkpoint: %u",
+ "%lu %lu %lu %lu %s %u"), (u_long)lsnp->file,
+ (u_long)lsnp->offset, (u_long)argp->ckp_lsn.file,
+ (u_long)argp->ckp_lsn.offset,
+ __os_ctime(&ckp_time, timebuf), lvh->nckp);
+
+ if ((ret = __lv_on_timestamp(lvh, lsnp,
+ argp->timestamp, DB___txn_ckp)) != 0)
+ goto err;
+ if (((ret = __get_last_ckp_info(lvh, &lastckp)) != 0) &&
+ ret != DB_NOTFOUND)
+ return (ret);
+ if (ret == DB_NOTFOUND)
+ goto cont;
+
+ if (LOG_COMPARE(&(argp->last_ckp), &(lastckp->lsn)) != 0) {
+ __db_errx(env, DB_STR_A("2550",
+ "[%lu][%lu] Last known checkpoint [%lu][%lu] not equal "
+ "to last_ckp :[%lu][%lu]. Some checkpoint log records "
+ "may be missing.", "%lu %lu %lu %lu %lu %lu"),
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ (u_long)lastckp->lsn.file, (u_long)lastckp->lsn.offset,
+ (u_long)argp->last_ckp.file, (u_long)argp->last_ckp.offset);
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+
+ /*
+ * Checkpoint are generally not performed quite often, so we see this
+ * as an error, but in txn commits we see it as a warning.
+ */
+ lastckp_time = (time_t)lastckp->timestamp;
+ if (argp->timestamp < lastckp->timestamp) {
+ __db_errx(env, DB_STR_A("2551",
+ "[%lu][%lu] Last known checkpoint [%lu, %lu] has a "
+ "timestamp %s smaller than this checkpoint timestamp %s.",
+ "%lu %lu %lu %lu %s %s"), (u_long)lsnp->file,
+ (u_long)lsnp->offset, (u_long)lastckp->lsn.file,
+ (u_long)lastckp->lsn.offset,
+ __os_ctime(&lastckp_time, timebuf),
+ __os_ctime(&ckp_time, timebuf));
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+
+cont:
+ cvp.env = env;
+ cvp.lsn = *lsnp;
+ cvp.ckp_lsn = argp->ckp_lsn;
+
+ /*
+ * Verify that all active txn's first lsn is greater than
+ * argp->ckp_lsn.
+ */
+ if ((ret = __iterate_txninfo(lvh, 0, 0,
+ __lv_ckp_vrfy_handler, &cvp)) != 0)
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ ckpinfo.timestamp = argp->timestamp;
+ ckpinfo.lsn = *lsnp;
+ ckpinfo.ckplsn = argp->ckp_lsn;
+
+ if ((ret = __put_ckp_info(lvh, &ckpinfo)) != 0)
+ goto err;
+out:
+err:
+ if (argp)
+ __os_free(env, argp);
+ if (lastckp)
+ __os_free(env, lastckp);
+ return (ret);
+}
+
+static int
+__lv_ckp_vrfy_handler(lvinfo, txninfop, param)
+ DB_LOG_VRFY_INFO *lvinfo;
+ VRFY_TXN_INFO *txninfop;
+ void *param;
+{
+ struct __ckp_verify_params *cvp;
+ int ret;
+
+ ret = 0;
+ cvp = (struct __ckp_verify_params *)param;
+ /* ckp_lsn should be less than any active txn's first lsn. */
+ if (txninfop->status == TXN_STAT_ACTIVE && LOG_COMPARE(&(cvp->ckp_lsn),
+ &(txninfop->first_lsn)) >= 0) {
+ __db_errx(cvp->env, DB_STR_A("2552",
+ "[%lu][%lu] ckp log's ckp_lsn [%lu][%lu] greater than "
+ "active txn %lx 's first lsn [%lu][%lu]",
+ "%lu %lu %lu %lu %lx %lu %lu"),
+ (u_long)cvp->lsn.file, (u_long)cvp->lsn.offset,
+ (u_long)cvp->ckp_lsn.file, (u_long)cvp->ckp_lsn.offset,
+ (u_long)txninfop->txnid,
+ (u_long)txninfop->first_lsn.file,
+ (u_long)txninfop->first_lsn.offset);
+ lvinfo->flags |= DB_LOG_VERIFY_ERR;
+ if (!F_ISSET(lvinfo, DB_LOG_VERIFY_CAF))
+ /* Stop the iteration. */
+ ret = DB_LOG_VERIFY_BAD;
+ }
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __txn_child_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__txn_child_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __txn_child_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ VRFY_TXN_INFO *ptvi, *ptvi2;
+ int ret, ret2, started;
+
+ /*
+ * This function is called when a txn T0's child txn T1 commits. Before
+ * this log record we don't know T0 and T1's relationship. This means
+ * we never know the T0 has an active child txn T1, all child txns
+ * we know are committed.
+ */
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+ ptvi = ptvi2 = NULL;
+ ret = ret2 = started = 0;
+
+ if ((ret = __txn_child_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ /*
+ * The __lv_log_fwdscr_oncmt call must precede LOG_VRFY_PROC otherwise
+ * this txn will be taken as an aborted txn.
+ */
+ if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) {
+ if ((ret = __lv_log_fwdscr_oncmt(lvh, argp->c_lsn, argp->child,
+ argp->txnp->txnid, 0)) != 0)
+ goto err;
+ if ((ret = __lv_log_fwdscr_onrec(lvh, argp->txnp->txnid,
+ argp->type, argp->prev_lsn, *lsnp)) != 0)
+ goto err;
+ goto out;/* We are done. */
+ }
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+ if ((ret = __return_txn_pages(lvh, argp->child,
+ argp->txnp->txnid)) != 0 && ret != DB_NOTFOUND)
+ goto err;/* Some txns may have updated no pages. */
+
+ /* Update parent txn info. */
+ if ((ret = __get_txn_vrfy_info(lvh, argp->txnp->txnid, &ptvi)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+ if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) {
+ if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) &&
+ ((ret2 = __txn_started(lvh, lvh->lv_config->start_lsn,
+ argp->txnp->txnid, &started)) == 0) && started != 0) {
+ ret = 0;
+ goto err;
+ }
+ if (ret2 != 0)
+ ret = ret2;
+ __db_errx(lvh->dbenv->env, DB_STR_A("2553",
+ "[%lu][%lu] Can not find an active transaction's "
+ "information, txnid: %lx.", "%lu %lu %lx"),
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ (u_long)argp->txnp->txnid);
+ ON_ERROR(lvh, DB_LOG_VERIFY_INTERR);
+
+ }
+ if (ptvi == NULL) {
+ if (ret == DB_NOTFOUND &&
+ F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL))
+ ret = 0;
+ goto out;
+
+ }
+ ptvi->nchild_commit++;
+ /*
+ * The start of this child txn caused lvh->ntxn_active to be
+ * incremented unnecessarily, so decrement it.
+ */
+ lvh->ntxn_active--;
+ if (ptvi->status != TXN_STAT_ACTIVE) {
+ __db_errx(lvh->dbenv->env, DB_STR_A("2554",
+ "[%lu][%lu] Parent txn %lx ended "
+ "before child txn %lx ends.", "%lu %lu %lx %lx"),
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ (u_long)argp->txnp->txnid, (u_long)argp->child);
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+ if ((ret = __put_txn_vrfy_info(lvh, ptvi)) != 0)
+ goto err;
+
+ /* Update child txn info. */
+ if ((ret = __get_txn_vrfy_info(lvh, argp->child, &ptvi2)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+ if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) {
+ if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) &&
+ ((ret2 = __txn_started(lvh, lvh->lv_config->start_lsn,
+ argp->child, &started)) == 0) && started != 0) {
+ ret = 0;
+ goto err;
+ }
+ if (ret2 != 0)
+ ret = ret2;
+ __db_errx(lvh->dbenv->env, DB_STR_A("2555",
+ "[%lu][%lu] Can not find an active "
+ "transaction's information, txnid: %lx.",
+ "%lu %lu %lx"), (u_long)lsnp->file,
+ (u_long)lsnp->offset, (u_long)argp->child);
+ ON_ERROR(lvh, DB_LOG_VERIFY_INTERR);
+
+ }
+ if (ptvi2 == NULL) {
+ if (ret == DB_NOTFOUND &&
+ F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL))
+ ret = 0;
+ goto out;
+
+ }
+ if (ptvi2->status != TXN_STAT_ACTIVE) {
+ __db_errx(lvh->dbenv->env, DB_STR_A("2556",
+ "[%lu][%lu] Txn %lx ended before it commits.",
+ "%lu %lu %lx"), (u_long)lsnp->file,
+ (u_long)lsnp->offset, (u_long)argp->child);
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+ ptvi2->status = TXN_STAT_COMMIT;
+ if ((ret = __put_txn_vrfy_info(lvh, ptvi2)) != 0)
+ goto err;
+out:
+err:
+ __os_free(env, argp);
+ if (ptvi != NULL && (ret2 = __free_txninfo(ptvi)) != 0 && ret == 0)
+ ret = ret2;
+ if (ptvi2 != NULL && (ret2 = __free_txninfo(ptvi2)) != 0 && ret == 0)
+ ret = ret2;
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __txn_xa_regop_42_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__txn_xa_regop_42_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __txn_xa_regop_42_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __txn_xa_regop_42_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type);
+ /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */
+err:
+ __os_free(env, argp);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __txn_prepare_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__txn_prepare_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __txn_prepare_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ VRFY_TXN_INFO *ptvi;
+ int ret, ret2, started;
+
+ ret = ret2 = started = 0;
+ ptvi = NULL;
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+
+ if ((ret = __txn_prepare_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+ if ((ret = __get_txn_vrfy_info(lvh, argp->txnp->txnid, &ptvi)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+
+ if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) {
+ if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) &&
+ ((ret2 = __txn_started(lvh, lvh->lv_config->start_lsn,
+ argp->txnp->txnid, &started)) == 0) && started != 0) {
+ ret = 0;
+ goto err;
+ }
+ if (ret2 != 0)
+ ret = ret2;
+ __db_errx(lvh->dbenv->env, DB_STR_A("2557",
+ "[%lu][%lu] Can not find an active transaction's "
+ "information, txnid: %lx.", "%lu %lu %lx"),
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ (u_long)argp->txnp->txnid);
+ ON_ERROR(lvh, DB_LOG_VERIFY_INTERR);
+
+ }
+ if (ptvi == NULL) {
+ if (ret == DB_NOTFOUND &&
+ F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL))
+ ret = 0;
+ goto out;
+
+ }
+ DB_ASSERT(env,
+ (IS_ZERO_LSN(ptvi->prep_lsn) && ptvi->status != TXN_STAT_PREPARE) ||
+ (!IS_ZERO_LSN(ptvi->prep_lsn) && ptvi->status == TXN_STAT_PREPARE));
+
+ lvh->ntxn_prep++;
+ lvh->ntxn_active--;
+
+ if (!IS_ZERO_LSN(ptvi->prep_lsn)) {/* Prepared more than once. */
+
+ __db_errx(lvh->dbenv->env, DB_STR_A("2558",
+ "[%lu][%lu] Multiple txn_prepare log record for "
+ "transaction %lx, previous prepare lsn: [%lu, %lu].",
+ "%lu %lu %lx %lu %lu"), (u_long)lsnp->file,
+ (u_long)lsnp->offset, (u_long)argp->txnp->txnid,
+ (u_long)ptvi->prep_lsn.file, (u_long)ptvi->prep_lsn.offset);
+ } else {
+ ptvi->prep_lsn = *lsnp;
+ ptvi->status = TXN_STAT_PREPARE;
+ }
+ ret = __put_txn_vrfy_info(lvh, ptvi);
+out:
+err:
+ __os_free(env, argp);
+ if (ptvi != NULL && (ret2 = __free_txninfo(ptvi)) != 0 && ret == 0)
+ ret = ret2;
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __txn_recycle_verify __P((ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__txn_recycle_verify(env, dbtp, lsnp, notused2, lvhp)
+ ENV *env;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ db_recops notused2;
+ void *lvhp;
+{
+ __txn_recycle_args *argp;
+ DB_LOG_VRFY_INFO *lvh;
+ int ret;
+
+ notused2 = DB_TXN_LOG_VERIFY;
+ lvh = (DB_LOG_VRFY_INFO *)lvhp;
+ ret = 0;
+
+ if ((ret = __txn_recycle_read(env, dbtp->data, &argp)) != 0)
+ return (ret);
+
+ LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID);
+
+ /* Add recycle info for all txns whose ID is in the [min, max] range. */
+ ret = __add_recycle_lsn_range(lvh, lsnp, argp->min, argp->max);
+
+out:
+
+err:
+
+ __os_free(env, argp);
+ return (ret);
+}
+
+/* Handle log types having timestamps, so far only __txn_ckp and __txn_regop. */
+static int
+__lv_on_timestamp(lvh, lsn, timestamp, logtype)
+ DB_LOG_VRFY_INFO *lvh;
+ const DB_LSN *lsn;
+ int32_t timestamp;
+ u_int32_t logtype;
+{
+ VRFY_TIMESTAMP_INFO *ltsinfo;
+ int ret;
+
+ ltsinfo = NULL;
+ ret = 0;
+ if ((ret = __get_latest_timestamp_info(lvh, *lsn, &ltsinfo)) == 0) {
+ DB_ASSERT(lvh->dbenv->env, ltsinfo != NULL);
+ if (ltsinfo->timestamp >= timestamp &&
+ F_ISSET(lvh, DB_LOG_VERIFY_VERBOSE)) {
+ __db_errx(lvh->dbenv->env, DB_STR_A("2559",
+ "[%lu][%lu] [WARNING] This log record of type %s "
+ "does not have a greater time stamp than "
+ "[%lu, %lu] of type %s", "%lu %lu %s %lu %lu %s"),
+ (u_long)lsn->file, (u_long)lsn->offset,
+ LOGTYPE_NAME(lvh, logtype),
+ (u_long)ltsinfo->lsn.file,
+ (u_long)ltsinfo->lsn.offset,
+ LOGTYPE_NAME(lvh, ltsinfo->logtype));
+ lvh->flags |= DB_LOG_VERIFY_WARNING;
+ }
+ }
+ if (ltsinfo != NULL)
+ __os_free(lvh->dbenv->env, ltsinfo);
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+
+ return (ret);
+}
+
+/*
+ * Called whenever the log record belongs to a transaction.
+ */
+static int
+__lv_on_txn_logrec(lvh, lsnp, prev_lsnp, txnp, type, dbregid)
+ DB_LOG_VRFY_INFO *lvh;
+ const DB_LSN *lsnp;
+ const DB_LSN *prev_lsnp;
+ const DB_TXN *txnp;
+ u_int32_t type;
+ int32_t dbregid;
+{
+ DBT fid;
+ VRFY_TXN_INFO *pvti;
+ u_int32_t txnid;
+ VRFY_FILEREG_INFO *fregp;
+ int ret, ret2, started;
+
+ ret = ret2 = started = 0;
+ pvti = NULL;
+ fregp = NULL;
+ lvh->lrtypes[type]++;/* Increment per-type log record count. */
+ txnid = txnp->txnid;
+ memset(&fid, 0, sizeof(fid));
+
+ if (dbregid == INVAL_DBREGID)
+ goto cont;
+ if ((ret = __get_filereg_by_dbregid(lvh, dbregid, &fregp)) != 0) {
+ if (ret == DB_NOTFOUND) {
+ /*
+ * It's likely that we are verifying a subset of logs
+ * and the DBREG_OPEN is outside the range.
+ */
+ if (!F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL))
+ __db_msg(lvh->dbenv->env, DB_STR_A("2560",
+ "[%lu][%lu] Transaction %lx is updating a "
+ "db file %d not registered.",
+ "%lu %lu %lx %d"),
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ (u_long)txnp->txnid, dbregid);
+ goto cont;
+ } else
+ goto err;
+ }
+
+ fid = fregp->fileid;
+cont:
+ if (IS_ZERO_LSN(*prev_lsnp) &&
+ (ret = __lv_on_new_txn(lvh, lsnp, txnp, type, dbregid, &fid)) != 0)
+ goto err;
+
+ if ((ret = __get_txn_vrfy_info(lvh, txnid, &pvti)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+
+ /* If can't find the txn, there is an internal error. */
+ if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) {
+ /*
+ * If verifying from middle, it's expected that txns begun
+ * before start are not found.
+ */
+ if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && ((ret2 =
+ __txn_started(lvh, lvh->lv_config->start_lsn, txnid,
+ &started)) == 0) && started != 0) {
+ ret = 0;
+ goto out;/* We are done. */
+ }
+ if (ret2 != 0)
+ ret = ret2;
+
+ __db_errx(lvh->dbenv->env, DB_STR_A("2561",
+ "[%lu][%lu] Can not find an active transaction's "
+ "information, txnid: %lx.", "%lu %lu %lx"),
+ (u_long)lsnp->file, (u_long)lsnp->offset, (u_long)txnid);
+ ON_ERROR(lvh, DB_LOG_VERIFY_INTERR);
+ }
+
+ /* Can't proceed without the txn info. */
+ if (pvti == NULL) {
+ if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL))
+ ret = 0;
+ goto out;
+ }
+
+ /* Check if prev lsn is wrong, and some log records may be missing. */
+ if (!IS_ZERO_LSN(*prev_lsnp) &&
+ LOG_COMPARE(prev_lsnp, &(pvti->cur_lsn)) != 0) {
+ __db_errx(lvh->dbenv->env, DB_STR_A("2562",
+ "[%lu][%lu] Previous record for transaction %lx is "
+ "[%lu][%lu] and prev_lsn is [%lu][%lu].",
+ "%lu %lu %lx %lu %lu %lu %lu"), (u_long)lsnp->file,
+ (u_long)lsnp->offset, (u_long)pvti->txnid,
+ (u_long)pvti->cur_lsn.file, (u_long)pvti->cur_lsn.offset,
+ (u_long)prev_lsnp->file, (u_long)prev_lsnp->offset);
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+
+ /*
+ * After the txn is prepared, the only valid log record for this txn
+ * is the commit record.
+ */
+ if (pvti->status == TXN_STAT_PREPARE && type != DB___txn_regop) {
+ __db_errx(lvh->dbenv->env, DB_STR_A("2563",
+ "[%lu][%lu] Update action is performed in a "
+ "prepared transaction %lx.", "%lu %lu %lx"),
+ (u_long)lsnp->file, (u_long)lsnp->offset, (u_long)txnid);
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+ pvti->cur_lsn = *lsnp;
+ pvti->flags = txnp->flags;
+ if (dbregid != INVAL_DBREGID && fid.size > 0 &&
+ (ret = __add_file_updated(pvti, &fid, dbregid)) != 0)
+ goto err;
+ if ((ret = __put_txn_vrfy_info(lvh, pvti)) != 0)
+ goto err;
+out:
+err:
+ if (pvti != NULL && (ret2 = __free_txninfo(pvti)) != 0 && ret == 0)
+ ret = ret2;
+ if (fregp != NULL &&
+ (ret2 = __free_filereg_info(fregp)) != 0 && ret == 0)
+ ret = ret2;
+ return (ret);
+}
+
+/*
+ * Called whenever a new transaction is started, including child transactions.
+ */
+static int
+__lv_on_new_txn (lvh, lsnp, txnp, type, dbregid, fid)
+ DB_LOG_VRFY_INFO *lvh;
+ const DB_LSN *lsnp;
+ const DB_TXN *txnp;
+ u_int32_t type;
+ int32_t dbregid;
+ const DBT *fid;
+{
+ VRFY_TXN_INFO vti, *pvti, *vtip;
+ int ret, tret;
+ u_int32_t txnid;
+ ENV *env;
+
+ ret = tret = 0;
+ txnid = txnp->txnid;
+ pvti = NULL;
+ memset(&vti, 0, sizeof(vti));
+ vti.txnid = txnid;
+ env = lvh->dbenv->env;
+ /* Log record type, may be used later. Pass lint checks. */
+ COMPQUIET(type, 0);
+
+ /*
+ * It's possible that the new txn is a child txn, we will decrement
+ * this value in __txn_child_verify when we realize this, because
+ * this value only records the number of outermost active txns.
+ */
+ lvh->ntxn_active++;
+
+ if ((ret = __get_txn_vrfy_info(lvh, txnid, &pvti)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+ if (ret == DB_NOTFOUND)
+ vtip = &vti;
+ else {/* The txnid is reused, may be illegal. */
+ vtip = pvti;
+ /*
+ * If this txn id was recycled, this use is legal. A legal
+ * recyclable txnid is immediately not recyclable after
+ * it's recycled here. And it's impossible for vtip->status
+ * to be TXN_STAT_ACTIVE, since we have made it TXN_STAT_ABORT
+ * when we detected this txn id recycle just now.
+ */
+ if (vtip->num_recycle > 0 && LOG_COMPARE(&(vtip->recycle_lsns
+ [vtip->num_recycle - 1]), lsnp) < 0) {
+ DB_ASSERT(env, vtip->status != TXN_STAT_ACTIVE);
+ if ((ret = __rem_last_recycle_lsn(vtip)) != 0)
+ goto err;
+ if ((ret = __clear_fileups(vtip)) != 0)
+ goto err;
+
+ vtip->status = 0;
+ ZERO_LSN(vtip->prep_lsn);
+ ZERO_LSN(vtip->last_lsn);
+
+ vtip->nchild_active = 0;
+ vtip->nchild_commit = 0;
+ vtip->nchild_abort = 0;
+ /*
+ * We may goto the else branch if this txn has child txns
+ * before any updates done on its behalf. So we should
+ * exclude this possibility to conclude a failed verification.
+ */
+ } else if (vtip->nchild_active + vtip->nchild_commit +
+ vtip->nchild_abort == 0) {
+ __db_errx(lvh->dbenv->env, DB_STR_A("2564",
+ "[%lu][%lu] Transaction id %lx reused without "
+ "being recycled with a __txn_recycle.",
+ "%lu %lu %lx"),
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ (u_long)txnid);
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+ }
+
+ vtip->first_lsn = *lsnp;
+ vtip->cur_lsn = *lsnp;
+ vtip->flags = txnp->flags;
+
+ /*
+ * It's possible that the first log rec does not update any file,
+ * like the __txn_child type of record.
+ */
+ if (fid->size > 0 && (ret =
+ __add_file_updated(vtip, fid, dbregid)) != 0)
+ goto err;
+ if ((ret = __put_txn_vrfy_info(lvh, vtip)) != 0)
+ goto err;
+
+err:
+ if (pvti != NULL && (tret = __free_txninfo(pvti)) != 0 && ret == 0)
+ ret = tret;
+ if ((tret = __free_txninfo_stack(&vti)) != 0 && ret == 0)
+ ret = tret;
+
+ return (ret);
+}
+
+/* Called when we detect that a new log file is used. */
+static int
+__lv_new_logfile_vrfy(lvh, lsnp)
+ DB_LOG_VRFY_INFO *lvh;
+ const DB_LSN *lsnp;
+{
+ int ret;
+
+ ret = 0;
+ if (IS_ZERO_LSN(lvh->last_lsn) || lvh->last_lsn.file == lsnp->file) {
+ lvh->last_lsn = *lsnp;
+ return (0);
+ }
+
+ /*
+ * If file number changed, it must have been incremented,
+ * and the offset is 0.
+ * */
+ if (lsnp->file - lvh->last_lsn.file != 1 || lsnp->offset !=
+ __lv_first_offset(lvh->dbenv->env)) {
+ __db_errx(lvh->dbenv->env,
+ "[%lu][%lu] Last log record verified ([%lu][%lu]) is not "
+ "immidiately before the current log record.",
+ (u_long)lsnp->file, (u_long)lsnp->offset,
+ (u_long)lvh->last_lsn.file, (u_long)lvh->last_lsn.offset);
+ ret = DB_LOG_VERIFY_BAD;
+ ON_ERROR(lvh, DB_LOG_VERIFY_ERR);
+ }
+
+ lvh->last_lsn = *lsnp;
+err:
+ return (ret);
+}
+
+static u_int32_t
+__lv_first_offset(env)
+ ENV *env;
+{
+ u_int32_t sz;
+
+ if (CRYPTO_ON(env))
+ sz = HDR_CRYPTO_SZ;
+ else
+ sz = HDR_NORMAL_SZ;
+
+ sz += sizeof(LOGP);
+
+ return sz;
+}
+
+/* Called when we see a non-transactional update log record. */
+static int
+__lv_on_nontxn_update(lvh, lsnp, txnid, logtype, fileid)
+ DB_LOG_VRFY_INFO *lvh;
+ const DB_LSN *lsnp;
+ u_int32_t txnid, logtype;
+ int32_t fileid;
+{
+ lvh->lrtypes[logtype]++;
+ COMPQUIET(txnid, 0);
+ if (fileid != INVAL_DBREGID) {
+ lvh->non_txnup_cnt++;
+ __db_msg(lvh->dbenv->env, DB_STR_A("2565",
+ "[%lu][%lu] Non-transactional update, "
+ "log type: %u, fileid: %d.", "%lu %lu %u %d"),
+ (u_long)lsnp->file, (u_long)lsnp->offset, logtype, fileid);
+ }
+
+ return (0);
+}
+
+static int
+__lv_on_txn_aborted(lvinfo)
+ DB_LOG_VRFY_INFO *lvinfo;
+{
+ int ret, ret2, sres;
+ VRFY_TXN_INFO *ptvi;
+ u_int32_t abtid;
+ DB_LSN lsn, slsn;
+
+ ret = ret2 = sres = 0;
+ abtid = lvinfo->aborted_txnid;
+ lsn = lvinfo->aborted_txnlsn;
+ slsn = lvinfo->lv_config->start_lsn;
+ ptvi = NULL;
+
+ if ((ret = __del_txn_pages(lvinfo, lvinfo->aborted_txnid)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;/* Some txns may have updated no pages. */
+ ret = __get_txn_vrfy_info(lvinfo, lvinfo->aborted_txnid, &ptvi);
+ if (ret == DB_NOTFOUND && !F_ISSET(lvinfo, DB_LOG_VERIFY_PARTIAL)) {
+ /*
+ * If verifying from slsn and the txn abtid started before
+ * slsn, it's expected that we can't find the txn.
+ */
+ if (!IS_ZERO_LSN(slsn) && (ret2 = __txn_started(lvinfo, slsn,
+ abtid, &sres)) == 0 && sres != 0) {
+ ret = 0;
+ goto err;
+ }
+ if (ret2 != 0)
+ ret = ret2;/* Use the same error msg below. */
+ __db_errx(lvinfo->dbenv->env, DB_STR_A("2566",
+ "[%lu][%lu] Can not find an active transaction's "
+ "information, txnid: %lx.", "%lu %lu %lx"),
+ (u_long)lsn.file, (u_long)lsn.offset,
+ (u_long)lvinfo->aborted_txnid);
+ ON_ERROR(lvinfo, DB_LOG_VERIFY_INTERR);
+ }
+ if (ptvi == NULL) {
+ if (ret == DB_NOTFOUND &&
+ F_ISSET(lvinfo, DB_LOG_VERIFY_PARTIAL))
+ ret = 0;
+ goto out;
+ }
+ ptvi->status = TXN_STAT_ABORT;
+ lvinfo->ntxn_abort++;
+ lvinfo->ntxn_active--;
+ /* Report txn stats. */
+ if (F_ISSET(lvinfo, DB_LOG_VERIFY_VERBOSE)) {
+ __db_msg(lvinfo->dbenv->env, DB_STR_A("2567",
+ "[%lu][%lu] Txn %lx aborted after this log record.",
+ "%lu %lu %lx"), (u_long)lvinfo->aborted_txnlsn.file,
+ (u_long)lvinfo->aborted_txnlsn.offset, (u_long)ptvi->txnid);
+ __db_msg(lvinfo->dbenv->env, DB_STR_A("2568",
+ "\tThe number of active, committed and aborted child txns "
+ "of txn %lx: %u, %u, %u.", "%lx %u %u %u"),
+ (u_long)ptvi->txnid, ptvi->nchild_active,
+ ptvi->nchild_commit, ptvi->nchild_abort);
+ }
+ lvinfo->aborted_txnid = 0;
+ lvinfo->aborted_txnlsn.file = lvinfo->aborted_txnlsn.offset = 0;
+ if ((ret = __put_txn_vrfy_info(lvinfo, ptvi)) != 0)
+ goto err;
+ if ((ret = __free_txninfo(ptvi)) != 0)
+ goto err;
+out:
+err:
+ return (ret);
+}
diff --git a/src/log/log_verify_stub.c b/src/log/log_verify_stub.c
new file mode 100644
index 00000000..e6589a50
--- /dev/null
+++ b/src/log/log_verify_stub.c
@@ -0,0 +1,79 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef HAVE_VERIFY
+
+#include "db_config.h"
+#include "db_int.h"
+
+static int __db_log_novrfy __P((ENV *));
+int __log_verify_pp __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *));
+int __log_verify __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *));
+int __log_verify_wrap __P((ENV *env, const char *, u_int32_t, const char *,
+ const char *, time_t, time_t, u_int32_t, u_int32_t, u_int32_t, u_int32_t,
+ int, int));
+
+/*
+ * __db_log_novrfy --
+ * Error when a Berkeley DB build doesn't include the access method.
+ */
+static int
+__db_log_novrfy(env)
+ ENV *env;
+{
+ __db_errx(env, DB_STR("2523",
+ "library build did not include support for log verification"));
+ return (DB_OPNOTSUP);
+}
+
+int
+__log_verify_pp(dbenv, lvconfig)
+ DB_ENV *dbenv;
+ const DB_LOG_VERIFY_CONFIG *lvconfig;
+{
+ COMPQUIET(lvconfig, NULL);
+
+ /* The dbenv is intact, callers should properly take care of it. */
+ return (__db_log_novrfy(dbenv->env));
+}
+
+int
+__log_verify(dbenv, lvconfig)
+ DB_ENV *dbenv;
+ const DB_LOG_VERIFY_CONFIG *lvconfig;
+{
+ COMPQUIET(lvconfig, NULL);
+
+ return (__db_log_novrfy(dbenv->env));
+}
+
+int
+__log_verify_wrap(env, envhome, cachesize, dbfile, dbname,
+ stime, etime, stfile, stoffset, efile, eoffset, caf, verbose)
+ ENV *env;
+ const char *envhome, *dbfile, *dbname;
+ time_t stime, etime;
+ u_int32_t cachesize, stfile, stoffset, efile, eoffset;
+ int caf, verbose;
+{
+ COMPQUIET(envhome, NULL);
+ COMPQUIET(dbfile, NULL);
+ COMPQUIET(dbname, NULL);
+ COMPQUIET(stime, 0);
+ COMPQUIET(etime, 0);
+ COMPQUIET(cachesize, 0);
+ COMPQUIET(stfile, 0);
+ COMPQUIET(stoffset, 0);
+ COMPQUIET(efile, 0);
+ COMPQUIET(eoffset, 0);
+ COMPQUIET(caf, 0);
+ COMPQUIET(verbose, 0);
+ return (__db_log_novrfy(env));
+}
+
+#endif /* !HAVE_VERIFY */
diff --git a/src/log/log_verify_util.c b/src/log/log_verify_util.c
new file mode 100644
index 00000000..88682921
--- /dev/null
+++ b/src/log/log_verify_util.c
@@ -0,0 +1,2234 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+/*
+ * This file contains helper functions like data structure and in-memory db
+ * management, which are used to store various log verification information.
+ */
+#include "db_config.h"
+#include "db_int.h"
+
+#include "dbinc/crypto.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+#include "dbinc/btree.h"
+#include "dbinc/hash.h"
+#include "dbinc/qam.h"
+#include "dbinc/mp.h"
+#include "dbinc/txn.h"
+#include "dbinc/fop.h"
+
+#include "dbinc/log_verify.h"
+
+#define BDBOP(op) do { \
+ ret = (op); \
+ if (ret != 0) { \
+ __lv_on_bdbop_err(ret); \
+ goto err; \
+ } \
+} while (0)
+
+#define BDBOP2(dbenv, op, funct) do { \
+ ret = (op); \
+ if (ret != 0) { \
+ __lv_on_bdbop_err(ret); \
+ __db_err(dbenv->env, ret, "\n%s", funct); \
+ return (ret); \
+ } \
+} while (0)
+
+#define BDBOP3(dbenv, op, excpt, funct) do { \
+ ret = (op); \
+ if (ret != 0) { \
+ __lv_on_bdbop_err(ret); \
+ if (ret != excpt) { \
+ __db_err(dbenv->env, ret, "\n%s", funct); \
+ return (ret); \
+ } \
+ } \
+} while (0)
+
+typedef int (*btcmp_funct)(DB *, const DBT *, const DBT *);
+typedef int (*dupcmp_funct)(DB *, const DBT *, const DBT *);
+
+static int __lv_add_recycle_handler __P((
+ DB_LOG_VRFY_INFO *, VRFY_TXN_INFO *, void *));
+static int __lv_add_recycle_lsn __P((VRFY_TXN_INFO *, const DB_LSN *));
+static size_t __lv_dbt_arrsz __P((const DBT *, u_int32_t));
+static int __lv_fidpgno_cmp __P((DB *, const DBT *, const DBT *));
+static int __lv_i32_cmp __P((DB *, const DBT *, const DBT *));
+static int __lv_lsn_cmp __P((DB *, const DBT *, const DBT *));
+static void __lv_on_bdbop_err __P((int));
+static int __lv_open_db __P((DB_ENV *, DB **, DB_THREAD_INFO *,
+ const char *, int, btcmp_funct, u_int32_t, dupcmp_funct));
+static int __lv_pack_filereg __P((const VRFY_FILEREG_INFO *, DBT *));
+static int __lv_pack_txn_vrfy_info __P((
+ const VRFY_TXN_INFO *, DBT *, DBT *data));
+static int __lv_seccbk_fname __P((DB *, const DBT *, const DBT *, DBT *));
+static int __lv_seccbk_lsn __P((DB *, const DBT *, const DBT *, DBT *));
+static int __lv_seccbk_txnpg __P((DB *, const DBT *, const DBT *, DBT *));
+static void __lv_setup_logtype_names __P((DB_LOG_VRFY_INFO *lvinfo));
+static int __lv_txnrgns_lsn_cmp __P((DB *, const DBT *, const DBT *));
+static int __lv_ui32_cmp __P((DB *, const DBT *, const DBT *));
+static int __lv_unpack_txn_vrfy_info __P((VRFY_TXN_INFO **, const DBT *));
+static int __lv_unpack_filereg __P((const DBT *, VRFY_FILEREG_INFO **));
+
+static void __lv_on_bdbop_err(ret)
+ int ret;
+{
+ /* Pass lint checks. We need the ret and this function for debugging. */
+ COMPQUIET(ret, 0);
+}
+
+/*
+ * __create_log_vrfy_info --
+ * Initialize and return a log verification handle to be used throughout
+ * a verification process.
+ *
+ * PUBLIC: int __create_log_vrfy_info __P((const DB_LOG_VERIFY_CONFIG *,
+ * PUBLIC: DB_LOG_VRFY_INFO **, DB_THREAD_INFO *));
+ */
+int
+__create_log_vrfy_info(cfg, lvinfopp, ip)
+ const DB_LOG_VERIFY_CONFIG *cfg;
+ DB_LOG_VRFY_INFO **lvinfopp;
+ DB_THREAD_INFO *ip;
+{
+ const char *envhome;
+ int inmem, ret;
+ u_int32_t cachesz, envflags;
+ const char *dbf1, *dbf2, *dbf3, *dbf4, *dbf5, *dbf6, *dbf7, *dbf8,
+ *dbf9, *dbf10, *dbf11;
+ DB_LOG_VRFY_INFO *lvinfop;
+
+ dbf1 = "__db_log_vrfy_txninfo.db";
+ dbf2 = "__db_log_vrfy_fileregs.db";
+ dbf3 = "__db_log_vrfy_pgtxn.db";
+ dbf4 = "__db_log_vrfy_lsntime.db";
+ dbf5 = "__db_log_vrfy_timelsn.db";
+ dbf6 = "__db_log_vrfy_ckps.db";
+ dbf7 = "__db_log_vrfy_dbregids.db";
+ dbf8 = "__db_log_vrfy_fnameuid.db";
+ dbf9 = "__db_log_vrfy_timerange.db";
+ dbf10 = "__db_log_vrfy_txnaborts.db";
+ dbf11 = "__db_log_vrfy_txnpg.db";
+
+ envhome = cfg->temp_envhome;
+ lvinfop = NULL;
+ cachesz = cfg->cachesize;
+ if (cachesz== 0)
+ cachesz = 1024 * 1024 * 256;
+
+ BDBOP(__os_malloc(NULL, sizeof(DB_LOG_VRFY_INFO), &lvinfop));
+ memset(lvinfop, 0, sizeof(DB_LOG_VRFY_INFO));
+ lvinfop->ip = ip;
+ __lv_setup_logtype_names(lvinfop);
+ /* Avoid the VERIFY_PARTIAL bit being cleared if no ckp_lsn exists. */
+ lvinfop->valid_lsn.file = lvinfop->valid_lsn.offset = (u_int32_t)-1;
+
+ /*
+ * The envhome parameter determines if we will use an in-memory
+ * environment and databases.
+ */
+ if (envhome == NULL) {
+ envflags = DB_PRIVATE;
+ inmem = 1;
+ } else {
+ envflags = 0;
+ inmem = 0;
+ }
+
+ /* Create log verify internal database environment. */
+ BDBOP(db_env_create(&lvinfop->dbenv, 0));
+ BDBOP(__memp_set_cachesize(lvinfop->dbenv, 0, cachesz, 1));
+ /*
+ * Log verification internal db environment should be accessed
+ * single-threaded. No transaction semantics needed.
+ */
+ BDBOP(__env_open(lvinfop->dbenv, envhome,
+ envflags | DB_CREATE | DB_INIT_MPOOL, 0666));
+
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txninfo, ip, dbf1,
+ inmem, __lv_ui32_cmp, 0, NULL));
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->fileregs, ip, dbf2,
+ inmem, NULL, 0, NULL));
+
+ /* No dup allowed, always overwrite data with same key. */
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->dbregids, ip, dbf7,
+ inmem, __lv_i32_cmp, 0, NULL));
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->pgtxn, ip, dbf3,
+ inmem, __lv_fidpgno_cmp, 0, NULL));
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txnpg, ip, dbf11,
+ inmem, __lv_ui32_cmp, DB_DUP | DB_DUPSORT, __lv_fidpgno_cmp));
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->lsntime, ip, dbf4,
+ inmem, __lv_lsn_cmp, 0, NULL));
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->timelsn, ip, dbf5,
+ inmem, __lv_i32_cmp, DB_DUP | DB_DUPSORT, __lv_lsn_cmp));
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txnaborts, ip, dbf10,
+ inmem, __lv_lsn_cmp, 0, NULL));
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->ckps, ip, dbf6,
+ inmem, __lv_lsn_cmp, 0, NULL));
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->fnameuid, ip, dbf8,
+ inmem, NULL, 0, NULL));
+ BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txnrngs, ip, dbf9,
+ inmem, __lv_ui32_cmp, DB_DUP | DB_DUPSORT, __lv_txnrgns_lsn_cmp));
+
+ BDBOP(__db_associate(lvinfop->lsntime, ip, NULL,
+ lvinfop->timelsn, __lv_seccbk_lsn, DB_CREATE));
+ BDBOP(__db_associate(lvinfop->fileregs, ip, NULL,
+ lvinfop->fnameuid, __lv_seccbk_fname, DB_CREATE));
+ BDBOP(__db_associate(lvinfop->pgtxn, ip, NULL,
+ lvinfop->txnpg, __lv_seccbk_txnpg, DB_CREATE));
+
+ *lvinfopp = lvinfop;
+
+ return (0);
+err:
+ if (lvinfop->dbenv && ret != 0)
+ __db_err(lvinfop->dbenv->env, ret, "__create_log_vrfy_info");
+ (void)__destroy_log_vrfy_info(lvinfop);
+
+ return (ret);
+}
+
+/*
+ * __destroy_log_vrfy_info --
+ * Destroy and free a log verification handle.
+ *
+ * PUBLIC: int __destroy_log_vrfy_info __P((DB_LOG_VRFY_INFO *));
+ */
+int
+__destroy_log_vrfy_info(lvinfop)
+ DB_LOG_VRFY_INFO *lvinfop;
+{
+ int ret;
+
+ ret = 0;
+ if (lvinfop == NULL)
+ return (0);
+
+ if (lvinfop->txnaborts != NULL &&
+ (ret = __db_close(lvinfop->txnaborts, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->txninfo != NULL &&
+ (ret = __db_close(lvinfop->txninfo, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->dbregids != NULL &&
+ (ret = __db_close(lvinfop->dbregids, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->fileregs != NULL &&
+ (ret = __db_close(lvinfop->fileregs, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->pgtxn != NULL &&
+ (ret = __db_close(lvinfop->pgtxn, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->lsntime != NULL &&
+ (ret = __db_close(lvinfop->lsntime, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->ckps != NULL &&
+ (ret = __db_close(lvinfop->ckps, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->txnrngs != NULL &&
+ (ret = __db_close(lvinfop->txnrngs, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->fnameuid != NULL &&
+ (ret = __db_close(lvinfop->fnameuid, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->timelsn != NULL &&
+ (ret = __db_close(lvinfop->timelsn, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->txnpg != NULL &&
+ (ret = __db_close(lvinfop->txnpg, NULL, 0)) != 0)
+ goto err;
+ if (lvinfop->dbenv != NULL &&
+ (ret = __env_close(lvinfop->dbenv, 0)) != 0)
+ goto err;
+err:
+ __os_free(NULL, lvinfop);
+
+ return (ret);
+}
+
+/* Secondary index callback function for DB_LOG_VRFY_INFO->timelsn. */
+static int
+__lv_seccbk_fname(secdb, key, data, result)
+ DB *secdb;
+ const DBT *key;
+ const DBT *data;
+ DBT *result;
+{
+ int ret, tret;
+ VRFY_FILEREG_INFO *freg;
+ char *buf;
+ size_t buflen, slen;
+
+ ret = tret = 0;
+ COMPQUIET(key, NULL);
+ if ((ret = __lv_unpack_filereg(data, &freg)) != 0)
+ goto out;
+ if (freg->fname == NULL || (slen = strlen(freg->fname)) == 0) {
+ ret = DB_DONOTINDEX;
+ goto out;
+ }
+
+ buflen = (slen + 1) * sizeof(char);
+ if ((ret = __os_umalloc(secdb->dbenv->env, buflen, &buf)) != 0)
+ goto out;
+ (void)strcpy(buf, freg->fname);
+ result->size = (u_int32_t)buflen;
+ result->flags |= DB_DBT_APPMALLOC;
+ result->data = buf;
+out:
+ if (freg != NULL && (tret = __free_filereg_info(freg)) != 0 && ret == 0)
+ ret = tret;
+ return (ret);
+}
+
+/* Secondary index callback function for DB_LOG_VRFY_INFO->txnpg. */
+static int
+__lv_seccbk_txnpg(secdb, key, data, result)
+ DB *secdb;
+ const DBT *key;
+ const DBT *data;
+ DBT *result;
+{
+ COMPQUIET(key, NULL);
+ COMPQUIET(secdb, NULL);
+ /* Txnid is the secondary key, and it's all the data dbt has. */
+ result->data = data->data;
+ result->size = data->size;
+
+ return (0);
+}
+
+/* Secondary index callback function for DB_LOG_VRFY_INFO->timelsn. */
+static int
+__lv_seccbk_lsn(secdb, key, data, result)
+ DB *secdb;
+ const DBT *key;
+ const DBT *data;
+ DBT *result;
+{
+ VRFY_TIMESTAMP_INFO *lvti;
+
+ COMPQUIET(key, NULL);
+ COMPQUIET(secdb, NULL);
+
+ lvti = (VRFY_TIMESTAMP_INFO *)data->data;
+ result->data = &(lvti->timestamp);
+ result->size = sizeof(lvti->timestamp);
+
+ return (0);
+}
+
+/*
+ * Open a BTREE database handle, optionally set the btree compare function
+ * and flags if any.
+ */
+static int
+__lv_open_db(dbenv, dbpp, ip, name, inmem, cmpf, sflags, dupcmpf)
+ DB_ENV *dbenv;
+ DB **dbpp;
+ const char *name;
+ int inmem;
+ btcmp_funct cmpf;
+ u_int32_t sflags;
+ dupcmp_funct dupcmpf;
+ DB_THREAD_INFO *ip;
+{
+ int ret;
+ const char *dbfname, *dbname;
+ DB *dbp;
+
+ dbp = NULL;
+ ret = 0;
+ if (inmem) {
+ dbfname = NULL;
+ dbname = name;
+ } else {
+ dbfname = name;
+ dbname = NULL;
+ }
+
+ BDBOP(db_create(&dbp, dbenv, 0));
+
+ if (cmpf != NULL)
+ BDBOP(__bam_set_bt_compare(dbp, cmpf));
+ if (dupcmpf != NULL)
+ dbp->dup_compare = dupcmpf;
+ if (sflags != 0)
+ BDBOP(__db_set_flags(dbp, sflags));
+ /* No concurrency needed, a big page size reduces overflow pages. */
+ BDBOP(__db_set_pagesize(dbp, 16 * 1024));
+
+ BDBOP(__db_open(dbp, ip, NULL, dbfname, dbname, DB_BTREE, DB_CREATE,
+ 0666, PGNO_BASE_MD));
+
+ *dbpp = dbp;
+
+ return (0);
+err:
+ if (dbenv != NULL && ret != 0)
+ __db_err(dbenv->env, ret, "__lv_open_db");
+ if (dbp != NULL)
+ (void)__db_close(dbp, NULL, 0);
+
+ return (ret);
+}
+
+/* Btree compare function for a [fileid, pgno] key. */
+static int
+__lv_fidpgno_cmp(db, dbt1, dbt2)
+ DB *db;
+ const DBT *dbt1;
+ const DBT *dbt2;
+{
+ db_pgno_t pgno1, pgno2;
+ int ret;
+ size_t len;
+
+ COMPQUIET(db, NULL);
+ len = DB_FILE_ID_LEN;
+ ret = memcmp(dbt1->data, dbt2->data, len);
+ if (ret == 0) {
+ memcpy(&pgno1, (u_int8_t *)dbt1->data + len,
+ sizeof(pgno1));
+ memcpy(&pgno2, (u_int8_t *)dbt2->data + len,
+ sizeof(pgno2));
+ ret = NUMCMP(pgno1, pgno2);
+ }
+
+ return (ret);
+}
+
+/* Btree compare function for a int32_t type of key. */
+static int
+__lv_i32_cmp(db, dbt1, dbt2)
+ DB *db;
+ const DBT *dbt1;
+ const DBT *dbt2;
+{
+ int32_t k1, k2;
+
+ COMPQUIET(db, NULL);
+ memcpy(&k1, dbt1->data, sizeof(k1));
+ memcpy(&k2, dbt2->data, sizeof(k2));
+
+ return (NUMCMP(k1, k2));
+}
+
+/* Btree compare function for a u_int32_t type of key. */
+static int
+__lv_ui32_cmp(db, dbt1, dbt2)
+ DB *db;
+ const DBT *dbt1;
+ const DBT *dbt2;
+{
+ u_int32_t k1, k2;
+
+ COMPQUIET(db, NULL);
+ memcpy(&k1, dbt1->data, sizeof(k1));
+ memcpy(&k2, dbt2->data, sizeof(k2));
+
+ return (NUMCMP(k1, k2));
+}
+
+/* Btree compare function for a DB_LSN type of key. */
+static int
+__lv_lsn_cmp(db, dbt1, dbt2)
+ DB *db;
+ const DBT *dbt1;
+ const DBT *dbt2;
+{
+ DB_LSN lsn1, lsn2;
+
+ DB_ASSERT(db->env, dbt1->size == sizeof(DB_LSN));
+ DB_ASSERT(db->env, dbt2->size == sizeof(DB_LSN));
+ memcpy(&lsn1, dbt1->data, sizeof(DB_LSN));
+ memcpy(&lsn2, dbt2->data, sizeof(DB_LSN));
+
+ return (LOG_COMPARE(&lsn1, &lsn2));
+}
+
+/*
+ * Structure management routines. We keep each structure on a
+ * consecutive memory chunk.
+ *
+ * The get functions will allocate memory via __os_malloc, and callers
+ * should free the memory after use. The update functions for VRFY_TXN_INFO
+ * and VRFY_FILEREG_INFO may realloc the structure.
+ */
+
+/*
+ * PUBLIC: int __put_txn_vrfy_info __P((const DB_LOG_VRFY_INFO *,
+ * PUBLIC: const VRFY_TXN_INFO *));
+ */
+int
+__put_txn_vrfy_info (lvinfo, txninfop)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ const VRFY_TXN_INFO *txninfop;
+{
+ int ret;
+ DBT key, data;
+
+ ret = __lv_pack_txn_vrfy_info(txninfop, &key, &data);
+ DB_ASSERT(lvinfo->dbenv->env, ret == 0);
+
+ BDBOP2(lvinfo->dbenv, __db_put(lvinfo->txninfo, lvinfo->ip, NULL,
+ &key, &data, 0), "__put_txn_vrfy_info");
+ __os_free(lvinfo->dbenv->env, data.data);
+
+ return (0);
+}
+
+/* Construct a key and data DBT from the structure. */
+static int
+__lv_pack_txn_vrfy_info(txninfop, key, data)
+ const VRFY_TXN_INFO *txninfop;
+ DBT *key, *data;
+{
+ int ret;
+ char *buf, *p;
+ size_t bufsz, len;
+ u_int32_t i;
+ DBT *pdbt;
+
+ memset(key, 0, sizeof(DBT));
+ memset(data, 0, sizeof(DBT));
+ ret = 0;
+ bufsz = TXN_VERIFY_INFO_TOTSIZE(*txninfop);
+
+ if ((ret = __os_malloc(NULL, bufsz, &buf)) != 0)
+ goto err;
+ memset(buf, 0, bufsz);
+ memcpy(buf, txninfop, TXN_VERIFY_INFO_FIXSIZE);
+ p = buf + TXN_VERIFY_INFO_FIXSIZE;
+ memcpy(p, txninfop->recycle_lsns, len = sizeof(DB_LSN) *
+ txninfop->num_recycle);
+ p += len;
+
+ for (i = 0; i < txninfop->filenum; i++) {
+
+ pdbt = &(txninfop->fileups[i]);
+ memcpy(p, &(pdbt->size), sizeof(pdbt->size));
+ p += sizeof(pdbt->size);
+ memcpy(p, pdbt->data, pdbt->size);
+ p += pdbt->size;
+ }
+
+ key->data = (void *)&txninfop->txnid;
+ key->size = sizeof(txninfop->txnid);
+ data->data = buf;
+ data->size = (u_int32_t)bufsz;
+ data->flags |= DB_DBT_MALLOC;
+err:
+ return (ret);
+}
+
+/* Calculate a DBT array's total number of bytes to store. */
+static size_t
+__lv_dbt_arrsz(arr, arrlen)
+ const DBT *arr;
+ u_int32_t arrlen;
+{
+ u_int32_t i;
+ size_t sz;
+
+ sz = 0;
+
+ /* For each DBT object, store its size and its data bytes. */
+ for (i = 0; i < arrlen; i++)
+ sz += arr[i].size + sizeof(arr[i].size);
+
+ return sz;
+}
+
+/*
+ * __get_txn_vrfy_info --
+ * Get a VRFY_TXN_INFO object from db by txnid. Callers should free the
+ * object by calling __free_txninfo.
+ *
+ * PUBLIC: int __get_txn_vrfy_info __P((const DB_LOG_VRFY_INFO *, u_int32_t,
+ * PUBLIC: VRFY_TXN_INFO **));
+ */
+int
+__get_txn_vrfy_info (lvinfo, txnid, txninfopp)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ u_int32_t txnid;
+ VRFY_TXN_INFO **txninfopp;
+{
+ int ret;
+ DBT key, data;
+
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ key.data = &txnid;
+ key.size = sizeof(txnid);
+
+ BDBOP3(lvinfo->dbenv, __db_get(lvinfo->txninfo, lvinfo->ip, NULL,
+ &key, &data, 0), DB_NOTFOUND, "__get_txn_vrfy_info");
+
+ if (ret != DB_NOTFOUND)
+ ret = __lv_unpack_txn_vrfy_info(txninfopp, &data);
+
+ return (ret);
+}
+
+/* Construct a structure from a DBT. */
+static int
+__lv_unpack_txn_vrfy_info(txninfopp, data)
+ VRFY_TXN_INFO **txninfopp;
+ const DBT *data;
+{
+ size_t bufsz;
+ VRFY_TXN_INFO *buf, *txninfop;
+ DB_LSN *lsns, *p;
+ u_int32_t i, sz;
+ char *pb, *q;
+ int ret;
+
+ ret = 0;
+ i = sz = 0;
+ lsns = p = NULL;
+ pb = q = NULL;
+ txninfop = (VRFY_TXN_INFO *)data->data;
+ lsns = (DB_LSN *)((char *)data->data + TXN_VERIFY_INFO_FIXSIZE);
+ pb = (char *)lsns + txninfop->num_recycle * sizeof(DB_LSN);
+
+ if ((ret = __os_malloc(NULL, bufsz = sizeof(VRFY_TXN_INFO), &buf)) != 0)
+ goto err;
+ memset(buf, 0, bufsz);
+ memcpy(buf, data->data, TXN_VERIFY_INFO_FIXSIZE);
+
+ if (txninfop->num_recycle != 0) {
+ if ((ret = __os_malloc(NULL,
+ txninfop->num_recycle * sizeof(DB_LSN), &p)) != 0)
+ goto err;
+ memcpy(p, lsns, txninfop->num_recycle * sizeof(DB_LSN));
+ buf->recycle_lsns = p;
+ }
+
+ if (txninfop->filenum != 0) {
+ if ((ret = __os_malloc(NULL,
+ txninfop->filenum * sizeof(DBT), &q)) != 0)
+ goto err;
+ memset(q, 0, txninfop->filenum * sizeof(DBT));
+ buf->fileups = (DBT *)q;
+ for (i = 0; i < txninfop->filenum; i++) {
+ memcpy(&sz, pb, sizeof(sz));
+ pb += sizeof(sz);
+ if ((ret = __os_malloc(NULL, sz, &q)) != 0)
+ goto err;
+ memcpy(q, pb, sz);
+ pb += sz;
+
+ buf->fileups[i].data = q;
+ buf->fileups[i].size = sz;
+ }
+ }
+
+ *txninfopp = buf;
+err:
+ return (ret);
+}
+
+static int
+__lv_add_recycle_lsn (txninfop, lsn)
+ VRFY_TXN_INFO *txninfop;
+ const DB_LSN *lsn;
+{
+ int ret;
+
+ ret = 0;
+ txninfop->num_recycle++;
+ if ((ret = __os_realloc(NULL, txninfop->num_recycle * sizeof(DB_LSN),
+ &(txninfop->recycle_lsns))) != 0)
+ goto err;
+ txninfop->recycle_lsns[txninfop->num_recycle - 1] = *lsn;
+err:
+ return (ret);
+}
+
+/*
+ * __add_recycle_lsn_range --
+ * Add recycle info for each txn within the recycled txnid range.
+ *
+ * PUBLIC: int __add_recycle_lsn_range __P((DB_LOG_VRFY_INFO *,
+ * PUBLIC: const DB_LSN *, u_int32_t, u_int32_t));
+ */
+int
+__add_recycle_lsn_range(lvinfo, lsn, min, max)
+ DB_LOG_VRFY_INFO *lvinfo;
+ const DB_LSN *lsn;
+ u_int32_t min, max;
+{
+ DBC *csr;
+ int ret, tret;
+ u_int32_t i;
+ DBT key2, data2;
+ struct __add_recycle_params param;
+
+ csr = NULL;
+ ret = tret = 0;
+ memset(&key2, 0, sizeof(DBT));
+ memset(&data2, 0, sizeof(DBT));
+ memset(&param, 0, sizeof(param));
+
+ if ((ret = __os_malloc(lvinfo->dbenv->env, sizeof(VRFY_TXN_INFO *) *
+ (param.ti2ul = 1024), &(param.ti2u))) != 0)
+ goto err;
+ param.ti2ui = 0;
+ param.recycle_lsn = *lsn;
+ param.min = min;
+ param.max = max;
+
+ /* Iterate the specified range and process each transaction. */
+ if ((ret = __iterate_txninfo(lvinfo, min, max, __lv_add_recycle_handler,
+ &param)) != 0)
+ goto err;
+
+ /*
+ * Save updated txninfo structures. We can't do so in the above
+ * iteration, so we have to save them here.
+ */
+ BDBOP(__db_cursor(lvinfo->txninfo, lvinfo->ip, NULL, &csr, DBC_BULK));
+
+ for (i = 0; i < param.ti2ui; i++) {
+ ret = __lv_pack_txn_vrfy_info(param.ti2u[i], &key2, &data2);
+ DB_ASSERT(lvinfo->dbenv->env, ret == 0);
+ BDBOP(__dbc_put(csr, &key2, &data2, DB_KEYLAST));
+ /*
+ * key2.data refers to param.ti2u[i]'s memory, data2.data is
+ * freed by DB since we set DB_DBT_MALLOC.
+ */
+ if ((ret = __free_txninfo(param.ti2u[i])) != 0)
+ goto err;
+ }
+
+err:
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+ __os_free(lvinfo->dbenv->env, param.ti2u);
+ if (ret != 0)
+ __db_err(lvinfo->dbenv->env, ret,
+ "__add_recycle_lsn_range");
+
+ return (ret);
+}
+
+/*
+ * __iterate_txninfo --
+ * Iterate throught the transaction info database as fast as possible,
+ * and process each key/data pair using a callback handler. Break the
+ * iteration if the handler returns non-zero values.
+ *
+ * PUBLIC: int __iterate_txninfo __P((DB_LOG_VRFY_INFO *, u_int32_t,
+ * PUBLIC: u_int32_t, TXNINFO_HANDLER, void *));
+ */
+int
+__iterate_txninfo(lvinfo, min, max, handler, param)
+ DB_LOG_VRFY_INFO *lvinfo;
+ u_int32_t min, max;
+ TXNINFO_HANDLER handler;
+ void *param;
+{
+ ENV *env;
+ VRFY_TXN_INFO *txninfop;
+ int ret, tret;
+ u_int32_t bufsz, pgsz, txnid;
+ size_t retkl, retdl;
+ char *btbuf;
+ u_int8_t *retk, *retd;
+ DBT key, data, data2;
+ DBC *csr;
+ void *p;
+
+ csr = NULL;
+ env = lvinfo->dbenv->env;
+ txninfop = NULL;
+ ret = tret = 0;
+ txnid = 0;
+ retkl = retdl = 0;
+ bufsz = 64 * 1024;
+ btbuf = NULL;
+ retk = retd = NULL;
+
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ memset(&data2, 0, sizeof(DBT));
+
+ pgsz = lvinfo->txninfo->pgsize;
+ DB_ASSERT(env, ret == 0);
+
+ if (bufsz % pgsz != 0)
+ bufsz = pgsz * (bufsz / pgsz);
+
+ if ((ret = __os_malloc(env, bufsz, &btbuf)) != 0)
+ goto err;
+
+ BDBOP(__db_cursor(lvinfo->txninfo, lvinfo->ip, NULL, &csr, DBC_BULK));
+
+ /*
+ * Use bulk retrieval to scan the database as fast as possible.
+ */
+ data.data = btbuf;
+ data.ulen = bufsz;
+ data.flags |= DB_DBT_USERMEM;
+
+ for (ret = __dbc_get(csr, &key, &data, DB_FIRST | DB_MULTIPLE_KEY) ;;
+ ret = __dbc_get(csr, &key, &data, DB_NEXT | DB_MULTIPLE_KEY)) {
+ switch (ret) {
+ case 0:
+ break;
+ case DB_NOTFOUND:
+ goto out;
+ /* No break statement allowed by lint here. */
+ case DB_BUFFER_SMALL:
+ if ((ret = __os_realloc(lvinfo->dbenv->env,
+ bufsz *= 2, &btbuf)) != 0)
+ goto out;
+ data.ulen = bufsz;
+ data.data = btbuf;
+ continue;/* Continue the for-loop. */
+ /* No break statement allowed by lint here. */
+ default:
+ goto err;
+ }
+
+ /*
+ * Do bulk get. Some txninfo objects may be updated by the
+ * handler, but we can't store them immediately in the same
+ * loop because we wouldn't be able to continue the bulk get
+ * using the same cursor; and we can't use another cursor
+ * otherwise we may self-block. In the handler we need to
+ * store the updated objects and store them to db when we get
+ * out of this loop.
+ */
+ DB_MULTIPLE_INIT(p, &data);
+ while (1) {
+ DB_MULTIPLE_KEY_NEXT(p, &data,
+ retk, retkl, retd, retdl);
+ if (p == NULL)
+ break;
+ DB_ASSERT(env, retkl == sizeof(txnid) && retk != NULL);
+ memcpy(&txnid, retk, retkl);
+ /*
+ * Process it if txnid in range or no range specified.
+ * The range must be a closed one.
+ */
+ if ((min != 0 && txnid >= min && max != 0 &&
+ txnid <= max) || (min == 0 && max == 0)) {
+ data2.data = retd;
+ data2.size = (u_int32_t)retdl;
+
+ if ((ret = __lv_unpack_txn_vrfy_info(
+ &txninfop, &data2)) != 0)
+ goto out;
+ if ((ret = handler(lvinfo, txninfop,
+ param)) != 0)
+ /* Stop the iteration on error. */
+ goto out;
+ }
+ }
+
+ }
+out:
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+err:
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+ __os_free(lvinfo->dbenv->env, btbuf);
+ return (ret);
+}
+
+/* Txninfo iteration handler to add recycle info for affected txns. */
+static int
+__lv_add_recycle_handler(lvinfo, txninfop, params)
+ DB_LOG_VRFY_INFO *lvinfo;
+ VRFY_TXN_INFO *txninfop;
+ void *params;
+{
+ int ret;
+ struct __add_recycle_params *param;
+
+ ret = 0;
+ param = (struct __add_recycle_params *)params;
+
+ /*
+ * If the txnid is reused, update its recycle info and note it for
+ * later update, otherwise free the txninfop structure.
+ */
+ if (txninfop->txnid < param->min && txninfop->txnid > param->max) {
+ ret = __free_txninfo(txninfop);
+ return (ret);
+ }
+
+ ret = __lv_add_recycle_lsn(txninfop, &(param->recycle_lsn));
+
+ if (ret != 0)
+ goto err;
+ /*
+ * Below is one way to tell if a txn is aborted without doing another
+ * backward pass of the log. However if the txn id is not in the
+ * chosen recycled txn id range, we can't tell, until all the log
+ * records are passed --- the remaining active txns are the aborted
+ * txns.
+ * No longer needed since we did another backward pass of the log
+ * and have all the txn lifetimes.
+ if (txninfop->status == TXN_STAT_ACTIVE)
+ __on_txn_abort(lvinfo, txninfop);
+ */
+ if (txninfop->status == TXN_STAT_PREPARE) {
+ __db_errx(lvinfo->dbenv->env,
+ "[ERROR] Transaction with ID %u is prepared and not "
+ "committed, but its ID is recycled by log record [%u, %u].",
+ txninfop->txnid, param->recycle_lsn.file,
+ param->recycle_lsn.offset);
+ }
+ /* Note down to store later. */
+ param->ti2u[(param->ti2ui)++] = txninfop;
+ if (param->ti2ui == param->ti2ul)
+ BDBOP(__os_realloc(lvinfo->dbenv->env,
+ sizeof(VRFY_TXN_INFO *) * (param->ti2ul *= 2),
+ &(param->ti2u)));
+err:
+ return (ret);
+
+}
+/*
+ * PUBLIC: int __rem_last_recycle_lsn __P((VRFY_TXN_INFO *));
+ */
+int
+__rem_last_recycle_lsn(txninfop)
+ VRFY_TXN_INFO *txninfop;
+{
+ int ret;
+
+ ret = 0;
+ if (txninfop->num_recycle == 0)
+ return (0);
+ txninfop->num_recycle--;
+ if (txninfop->num_recycle > 0)
+ BDBOP(__os_realloc(NULL, txninfop->num_recycle * sizeof(DB_LSN),
+ &(txninfop->recycle_lsns)));
+ else {
+ __os_free(NULL, txninfop->recycle_lsns);
+ txninfop->recycle_lsns = NULL;
+ }
+err:
+ return (ret);
+
+}
+
+/*
+ * __add_file_updated --
+ * Add a file's dbregid and uid to the updating txn if it's not yet
+ * recorded.
+ *
+ * PUBLIC: int __add_file_updated __P((VRFY_TXN_INFO *, const DBT *, int32_t));
+ */
+int
+__add_file_updated (txninfop, fileid, dbregid)
+ VRFY_TXN_INFO *txninfop;
+ const DBT *fileid;
+ int32_t dbregid;
+{
+ int ret;
+ DBT *pdbt, *p;
+ u_int32_t found, i;
+
+ ret = 0;
+ p = pdbt = NULL;
+
+ for (found = 0, i = 0; i < txninfop->filenum; i++) {
+ p = &(txninfop->fileups[i]);
+ if (p->size == fileid->size &&
+ memcmp(p->data, fileid->data, p->size) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found)
+ return (0);
+
+ /* Add file's uid into the array, deep copy from fileid. */
+ txninfop->filenum++;
+ if ((ret = __os_realloc(NULL, txninfop->filenum *
+ sizeof(DBT), &(txninfop->fileups))) != 0)
+ goto err;
+
+ pdbt = &(txninfop->fileups[txninfop->filenum - 1]);
+ memset(pdbt, 0, sizeof(DBT));
+ if ((ret = __os_malloc(NULL,
+ pdbt->size = fileid->size, &(pdbt->data))) != 0)
+ goto err;
+ memcpy(pdbt->data, fileid->data, fileid->size);
+
+ /* Add file dbregid into the array. */
+ BDBOP(__os_realloc(NULL, txninfop->filenum *
+ sizeof(int32_t), &(txninfop->dbregid)));
+ txninfop->dbregid[txninfop->filenum - 1] = dbregid;
+err:
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __del_file_updated __P((VRFY_TXN_INFO *, const DBT *));
+ */
+int
+__del_file_updated (txninfop, fileid)
+ VRFY_TXN_INFO *txninfop;
+ const DBT *fileid;
+{
+ u_int32_t found, i;
+ int ret;
+ DBT *p;
+ void *pdbtdata;
+
+ ret = 0;
+
+ if (txninfop->filenum == 0)
+ return (0);
+
+ /*
+ * If the array has an element identical to fileid, remove it. fileid
+ * itself is intact after this function call.
+ */
+ for (found = 0, i = 0, pdbtdata = NULL; i < txninfop->filenum; i++) {
+ p = &(txninfop->fileups[i]);
+ if (p->size == fileid->size &&
+ memcmp(p->data, fileid->data, p->size) == 0) {
+ pdbtdata = p->data;
+ if (txninfop->filenum > 1) {
+ memmove(txninfop->fileups + i, txninfop->
+ fileups + i + 1, sizeof(DBT) * (txninfop->
+ filenum - (i + 1)));
+ memmove(txninfop->dbregid + i, txninfop->
+ dbregid + i + 1, sizeof(int32_t) *
+ (txninfop->filenum - (i + 1)));
+ } else {
+ __os_free(NULL, txninfop->fileups);
+ __os_free(NULL, txninfop->dbregid);
+ txninfop->fileups = NULL;
+ txninfop->dbregid = NULL;
+ }
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ txninfop->filenum--;
+ if (txninfop->filenum) {
+ BDBOP(__os_realloc(NULL, sizeof(DBT) *
+ txninfop->filenum, &(txninfop->fileups)));
+ BDBOP(__os_realloc(NULL, sizeof(int32_t) *
+ txninfop->filenum, &(txninfop->dbregid)));
+ }
+ __os_free(NULL, pdbtdata);
+ }
+err:
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __clear_fileups __P((VRFY_TXN_INFO *));
+ */
+int
+__clear_fileups(txninfop)
+ VRFY_TXN_INFO *txninfop;
+{
+ u_int32_t i;
+
+ for (i = 0; i < txninfop->filenum; i++)
+ __os_free(NULL, txninfop->fileups[i].data);
+
+ __os_free(NULL, txninfop->fileups);
+ __os_free(NULL, txninfop->dbregid);
+ txninfop->fileups = NULL;
+ txninfop->dbregid = NULL;
+ txninfop->filenum = 0;
+
+ return (0);
+}
+
+/*
+ * __free_txninfo_stack --
+ * The object is on stack, only free its internal memory, not itself.
+ * PUBLIC: int __free_txninfo_stack __P((VRFY_TXN_INFO *));
+ */
+int
+__free_txninfo_stack (p)
+ VRFY_TXN_INFO *p;
+{
+ u_int32_t i;
+
+ if (p == NULL)
+ return (0);
+
+ if (p->fileups != NULL) {
+ for (i = 0; i < p->filenum; i++)
+ __os_free(NULL, p->fileups[i].data);
+ __os_free(NULL, p->fileups);
+ }
+
+ if (p->dbregid != NULL)
+ __os_free(NULL, p->dbregid);
+
+ if (p->recycle_lsns != NULL)
+ __os_free(NULL, p->recycle_lsns);
+
+ return (0);
+}
+/*
+ * PUBLIC: int __free_txninfo __P((VRFY_TXN_INFO *));
+ */
+int
+__free_txninfo(p)
+ VRFY_TXN_INFO *p;
+{
+ (void)__free_txninfo_stack(p);
+ __os_free(NULL, p);
+
+ return (0);
+}
+
+/* Construct a key and data DBT from the structure. */
+static int
+__lv_pack_filereg(freginfo, data)
+ const VRFY_FILEREG_INFO *freginfo;
+ DBT *data;
+{
+ char *buf, *p;
+ size_t bufsz, offset;
+ int ret;
+
+ ret = 0;
+ if ((ret = __os_malloc(NULL,
+ bufsz = FILE_REG_INFO_TOTSIZE(*freginfo), &buf)) != 0)
+ goto err;
+ memset(buf, 0, bufsz);
+
+ memcpy(buf, freginfo, FILE_REG_INFO_FIXSIZE);
+ p = buf + FILE_REG_INFO_FIXSIZE;
+
+ offset = sizeof(int32_t) * freginfo->regcnt;
+ memcpy(p, freginfo->dbregids, offset);
+ p += offset;
+
+ memcpy(p, &(freginfo->fileid.size), sizeof(freginfo->fileid.size));
+ p += sizeof(freginfo->fileid.size);
+ memcpy(p, freginfo->fileid.data, freginfo->fileid.size);
+ p += freginfo->fileid.size;
+ (void)strcpy(p, freginfo->fname);
+
+ data->data = buf;
+ data->size = (u_int32_t)bufsz;
+err:
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __put_filereg_info __P((const DB_LOG_VRFY_INFO *,
+ * PUBLIC: const VRFY_FILEREG_INFO *));
+ */
+int __put_filereg_info (lvinfo, freginfo)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ const VRFY_FILEREG_INFO *freginfo;
+{
+
+ int ret;
+ DBT data;
+
+ memset(&data, 0, sizeof(DBT));
+
+ if ((ret = __lv_pack_filereg(freginfo, &data)) != 0)
+ goto err;
+
+ /*
+ * We store dbregid-filereg map into dbregids.db, but we can't make
+ * dbregids.db the sec db of fileregs.db, because dbregid is only
+ * valid when a db file is open, we want to delete data with same
+ * key in dbregids.db, but we want to keep all filereg_info data in
+ * fileregs.db to track all db file lifetime and status.
+ *
+ * Consequently we will store dbregid-file_uid in dbregs.db, so that we
+ * can delete dbregid when the db handle is closed, and we can
+ * use the dbregid to get the currently open db file's uid.
+ */
+
+ BDBOP2(lvinfo->dbenv, __db_put(lvinfo->fileregs, lvinfo->ip, NULL,
+ (DBT *)&(freginfo->fileid), &data, 0), "__put_filereg_info");
+
+err:
+ if (data.data != NULL)
+ __os_free(lvinfo->dbenv->env, data.data);
+
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __del_filelife __P((const DB_LOG_VRFY_INFO *, int32_t));
+ */
+int
+__del_filelife(lvinfo, dbregid)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ int32_t dbregid;
+{
+ int ret;
+ DBT key;
+
+ memset(&key, 0, sizeof(DBT));
+ key.data = &(dbregid);
+ key.size = sizeof(dbregid);
+
+ if ((ret = __db_del(lvinfo->dbregids, lvinfo->ip, NULL,
+ &key, 0)) != 0)
+ goto err;
+
+err:
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __put_filelife __P((const DB_LOG_VRFY_INFO *, VRFY_FILELIFE *));
+ */
+int
+__put_filelife (lvinfo, pflife)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ VRFY_FILELIFE *pflife;
+{
+ int ret;
+ DBT key, data;
+
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ key.data = &(pflife->dbregid);
+ key.size = sizeof(pflife->dbregid);
+ data.data = pflife;
+ data.size = sizeof(VRFY_FILELIFE);
+
+ if ((ret = __db_put(lvinfo->dbregids, lvinfo->ip, NULL,
+ &key, &data, 0)) != 0)
+ goto err;
+
+err:
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __get_filelife __P((const DB_LOG_VRFY_INFO *,
+ * PUBLIC: int32_t, VRFY_FILELIFE **));
+ */
+int
+__get_filelife (lvinfo, dbregid, flifepp)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ int32_t dbregid;
+ VRFY_FILELIFE **flifepp;
+{
+ int ret;
+ DBT key, data;
+ VRFY_FILELIFE *flifep;
+
+ ret = 0;
+ flifep = NULL;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ key.data = &dbregid;
+ key.size = sizeof(dbregid);
+ if ((ret = __db_get(lvinfo->dbregids, lvinfo->ip, NULL,
+ &key, &data, 0)) != 0)
+ goto err;
+ if ((ret = __os_malloc(lvinfo->dbenv->env,
+ sizeof(VRFY_FILELIFE), &flifep)) != 0)
+ goto err;
+ DB_ASSERT(lvinfo->dbenv->env, flifep != NULL);
+ memcpy(flifep, data.data, sizeof(VRFY_FILELIFE));
+ *flifepp = flifep;
+err:
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __get_filereg_by_dbregid __P((const DB_LOG_VRFY_INFO *,
+ * PUBLIC: int32_t, VRFY_FILEREG_INFO **));
+ */
+int
+__get_filereg_by_dbregid(lvinfo, dbregid, freginfopp)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ int32_t dbregid;
+ VRFY_FILEREG_INFO **freginfopp;
+{
+ int ret;
+ DBT key, data;
+ char uid[DB_FILE_ID_LEN];
+ VRFY_FILELIFE *pflife;
+
+ memset(&data, 0, sizeof(DBT));
+ memset(&key, 0, sizeof(DBT));
+ key.data = &dbregid;
+ key.size = sizeof(dbregid);
+
+ BDBOP3(lvinfo->dbenv, __db_get(lvinfo->dbregids, lvinfo->ip, NULL,
+ &key, &data, 0), DB_NOTFOUND, "__get_filereg_by_dbregid");
+ if (ret == DB_NOTFOUND)
+ goto err;
+
+ /* Use the file-uid as key to retrieve from fileregs.db. */
+ pflife = (VRFY_FILELIFE *)data.data;
+ memcpy((void *)uid, (void *)pflife->fileid, key.size = DB_FILE_ID_LEN);
+
+ key.data = (void *)uid;
+ memset(&data, 0, sizeof(DBT));
+
+ BDBOP3(lvinfo->dbenv, __db_get(lvinfo->fileregs, lvinfo->ip, NULL,
+ &key, &data, 0), DB_NOTFOUND, "__get_filereg_by_dbregid");
+ if (ret == DB_NOTFOUND)
+ goto err;
+ if ((ret = __lv_unpack_filereg(&data, freginfopp)) != 0)
+ goto err;
+
+err:
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __add_dbregid __P((DB_LOG_VRFY_INFO *, VRFY_FILEREG_INFO *,
+ * PUBLIC: int32_t, u_int32_t, DB_LSN, DBTYPE, db_pgno_t, int *));
+ */
+int
+__add_dbregid(lvh, freg, dbregid, opcode, lsn, dbtype, meta_pgno, addp)
+ DB_LOG_VRFY_INFO *lvh;
+ VRFY_FILEREG_INFO *freg;
+ int32_t dbregid;
+ u_int32_t opcode;
+ DB_LSN lsn;
+ DBTYPE dbtype;
+ db_pgno_t meta_pgno;
+ int *addp;
+{
+ int inarray, ret, tret;
+ u_int32_t i, j;
+ VRFY_FILELIFE flife;
+
+ inarray = ret = tret = 0;
+ for (i = 0; i < freg->regcnt; i++) {
+ if (freg->dbregids[i] == dbregid) {
+ if (!IS_DBREG_CLOSE(opcode)) {
+ /* Opening an open dbreg id. */
+ if (IS_DBREG_OPEN(opcode) &&
+ (opcode != DBREG_CHKPNT &&
+ opcode != DBREG_XCHKPNT)) {
+ tret = 2;
+ goto err;
+ }
+ tret = 0;
+ inarray = 1;
+ } else
+ /* Found the dbregid; gonna remove it. */
+ tret = -1;
+ break;
+ }
+ }
+
+ if (IS_DBREG_OPEN(opcode))
+ tret = 1;/* dbregid not in the array, gonna add 1. */
+
+ /*
+ * Remove closed dbregid. dbregid can be recycled, not unique to a db
+ * file, it's dynamically allocated for each db handle.
+ */
+ if (tret == -1) {
+ for (j = i; j < freg->regcnt - 1; j++)
+ freg->dbregids[j] = freg->dbregids[j + 1];
+ freg->regcnt--;
+ BDBOP(__os_realloc(lvh->dbenv->env,
+ sizeof(int32_t) * freg->regcnt, &(freg->dbregids)));
+ /* Don't remove dbregid life info from dbregids db. */
+ } else if (tret == 1) {
+ if (!inarray) {
+ freg->regcnt++;
+ BDBOP(__os_realloc(lvh->dbenv->env,
+ sizeof(int32_t) * freg->regcnt, &(freg->dbregids)));
+ freg->dbregids[freg->regcnt - 1] = dbregid;
+ }
+ flife.dbregid = dbregid;
+ memcpy(flife.fileid, freg->fileid.data, freg->fileid.size);
+ flife.lifetime = opcode;
+ flife.dbtype = dbtype;
+ flife.lsn = lsn;
+ flife.meta_pgno = meta_pgno;
+ if ((ret = __put_filelife(lvh, &flife)) != 0)
+ goto err;
+ }
+
+err:
+ *addp = tret;
+ return (ret);
+
+}
+
+/*
+ * PUBLIC: int __get_filereg_info __P((const DB_LOG_VRFY_INFO *, const DBT *,
+ * PUBLIC: VRFY_FILEREG_INFO **));
+ */
+int
+__get_filereg_info (lvinfo, fuid, freginfopp)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ const DBT *fuid;
+ VRFY_FILEREG_INFO **freginfopp;
+{
+ int ret;
+ DBT data;
+
+ memset(&data, 0, sizeof(DBT));
+
+ BDBOP3(lvinfo->dbenv, __db_get(lvinfo->fileregs, lvinfo->ip, NULL,
+ (DBT *)fuid, &data, 0), DB_NOTFOUND, "__get_filereg_info");
+ if (ret == DB_NOTFOUND)
+ goto err;
+ if ((ret = __lv_unpack_filereg(&data, freginfopp)) != 0)
+ goto err;
+
+err:
+ return (ret);
+}
+
+static int
+__lv_unpack_filereg(data, freginfopp)
+ const DBT *data;
+ VRFY_FILEREG_INFO **freginfopp;
+{
+ char *p, *q;
+ u_int32_t fidsz, arrsz;
+ VRFY_FILEREG_INFO *buf;
+ int ret;
+
+ ret = 0;
+ p = q = NULL;
+ fidsz = arrsz = 0;
+ buf = NULL;
+
+ if ((ret = __os_malloc(NULL, sizeof(VRFY_FILEREG_INFO), &buf)) != 0)
+ goto err;
+ memset(buf, 0, sizeof(VRFY_FILEREG_INFO));
+
+ memcpy(buf, data->data, FILE_REG_INFO_FIXSIZE);
+ *freginfopp = (VRFY_FILEREG_INFO *)buf;
+ p = ((char *)(data->data)) + FILE_REG_INFO_FIXSIZE;
+
+ if ((ret = __os_malloc(NULL, arrsz = (*freginfopp)->regcnt *
+ sizeof(int32_t), &((*freginfopp)->dbregids))) != 0)
+ goto err;
+ memcpy((*freginfopp)->dbregids, p, arrsz);
+ p += arrsz;
+
+ memcpy(&fidsz, p, sizeof(fidsz));
+ p += sizeof(fidsz);
+ if ((ret = __os_malloc(NULL, fidsz, &q)) != 0)
+ goto err;
+ memcpy(q, p, fidsz);
+ (*freginfopp)->fileid.data = q;
+ (*freginfopp)->fileid.size = fidsz;
+ p += fidsz;
+
+ if ((ret = __os_malloc(NULL, sizeof(char) * (strlen(p) + 1), &q)) != 0)
+ goto err;
+ (void)strcpy(q, p);
+
+ (*freginfopp)->fname = q;
+err:
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __free_filereg_info __P((VRFY_FILEREG_INFO *));
+ */
+int
+__free_filereg_info(p)
+ VRFY_FILEREG_INFO *p;
+{
+ if (p == NULL)
+ return (0);
+ if (p ->fname != NULL)
+ __os_free(NULL, (void *)(p->fname));
+ if (p->fileid.data != NULL)
+ __os_free(NULL, p->fileid.data);
+ if (p->dbregids != NULL)
+ __os_free(NULL, p->dbregids);
+ __os_free(NULL, p);
+
+ return (0);
+}
+
+/*
+ * PUBLIC: int __get_ckp_info __P((const DB_LOG_VRFY_INFO *, DB_LSN,
+ * PUBLIC: VRFY_CKP_INFO **));
+ */
+int
+__get_ckp_info (lvinfo, lsn, ckpinfopp)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ DB_LSN lsn;
+ VRFY_CKP_INFO **ckpinfopp;
+{
+ int ret;
+ DBT key, data;
+ VRFY_CKP_INFO *ckpinfo;
+
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ key.data = &lsn;
+ key.size = sizeof(DB_LSN);
+ BDBOP3(lvinfo->dbenv, __db_get(lvinfo->ckps, lvinfo->ip, NULL,
+ &key, &data, 0), DB_NOTFOUND, "__get_ckp_info");
+
+ if (ret == DB_NOTFOUND)
+ goto err;
+
+ if ((ret = __os_malloc(lvinfo->dbenv->env,
+ sizeof(VRFY_CKP_INFO), &ckpinfo)) != 0)
+ goto err;
+ memcpy(ckpinfo, data.data, sizeof(VRFY_CKP_INFO));
+ *ckpinfopp = ckpinfo;
+err:
+ return (ret);
+
+}
+
+/*
+ * PUBLIC: int __get_last_ckp_info __P((const DB_LOG_VRFY_INFO *,
+ * PUBLIC: VRFY_CKP_INFO **));
+ */
+int
+__get_last_ckp_info (lvinfo, ckpinfopp)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ VRFY_CKP_INFO **ckpinfopp;
+{
+ int ret, tret;
+ DBT key, data;
+ VRFY_CKP_INFO *ckpinfo;
+ DBC *csr;
+
+ csr = NULL;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ BDBOP(__db_cursor(lvinfo->ckps, lvinfo->ip, NULL, &csr, 0));
+ if ((ret = __dbc_get(csr, &key, &data, DB_LAST)) != 0)
+ goto err;
+
+ if ((ret = __os_malloc(lvinfo->dbenv->env,
+ sizeof(VRFY_CKP_INFO), &ckpinfo)) != 0)
+ goto err;
+ DB_ASSERT(lvinfo->dbenv->env, sizeof(VRFY_CKP_INFO) == data.size);
+ memcpy(ckpinfo, data.data, sizeof(VRFY_CKP_INFO));
+ *ckpinfopp = ckpinfo;
+err:
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+ if (ret != 0 && ret != DB_NOTFOUND)
+ __db_err(lvinfo->dbenv->env, ret, "__get_last_ckp_info");
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __put_ckp_info __P((const DB_LOG_VRFY_INFO *,
+ * PUBLIC: const VRFY_CKP_INFO *));
+ */
+int __put_ckp_info (lvinfo, ckpinfo)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ const VRFY_CKP_INFO *ckpinfo;
+{
+ int ret;
+ DBT key, data;
+
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ key.data = (void *)&ckpinfo->lsn;
+ key.size = sizeof(DB_LSN);
+ data.data = (void *)ckpinfo;
+ data.size = sizeof(VRFY_CKP_INFO);
+
+ BDBOP2(lvinfo->dbenv, __db_put(lvinfo->ckps, lvinfo->ip,
+ NULL, &key, &data, 0), "__put_ckp_info");
+ return (0);
+}
+
+/*
+ * PUBLIC: int __get_timestamp_info __P((const DB_LOG_VRFY_INFO *,
+ * PUBLIC: DB_LSN, VRFY_TIMESTAMP_INFO **));
+ */
+int __get_timestamp_info (lvinfo, lsn, tsinfopp)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ DB_LSN lsn;
+ VRFY_TIMESTAMP_INFO **tsinfopp;
+{
+ int ret;
+ DBT key, data;
+ VRFY_TIMESTAMP_INFO *tsinfo;
+
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ key.data = &lsn;
+ key.size = sizeof(DB_LSN);
+ BDBOP3(lvinfo->dbenv, __db_get(lvinfo->lsntime, lvinfo->ip, NULL,
+ &key, &data, 0), DB_NOTFOUND, "__get_timestamp_info");
+
+ if (ret == DB_NOTFOUND)
+ goto err;
+
+ if ((ret = __os_malloc(lvinfo->dbenv->env,
+ sizeof(VRFY_TIMESTAMP_INFO), &tsinfo)) != 0)
+ goto err;
+
+ memcpy(tsinfo, data.data, sizeof(VRFY_TIMESTAMP_INFO));
+ *tsinfopp = tsinfo;
+err:
+ return (ret);
+}
+
+/*
+ * __get_latest_timestamp_info --
+ * Get latest timestamp info before lsn.
+ * PUBLIC: int __get_latest_timestamp_info __P((const DB_LOG_VRFY_INFO *,
+ * PUBLIC: DB_LSN, VRFY_TIMESTAMP_INFO **));
+ */
+int __get_latest_timestamp_info(lvinfo, lsn, tsinfopp)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ DB_LSN lsn;
+ VRFY_TIMESTAMP_INFO **tsinfopp;
+{
+ int ret, tret;
+ DBT key, data;
+ VRFY_TIMESTAMP_INFO *tsinfo;
+ DBC *csr;
+
+ csr = NULL;
+ ret = tret = 0;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ key.data = &lsn;
+ key.size = sizeof(lsn);
+ BDBOP(__db_cursor(lvinfo->lsntime, lvinfo->ip, NULL, &csr, 0));
+
+ BDBOP(__dbc_get(csr, &key, &data, DB_SET));
+ BDBOP(__dbc_get(csr, &key, &data, DB_PREV));
+
+ if ((ret = __os_malloc(lvinfo->dbenv->env, sizeof(VRFY_TIMESTAMP_INFO),
+ &tsinfo)) != 0)
+ goto err;
+
+ memcpy(tsinfo, data.data, sizeof(VRFY_TIMESTAMP_INFO));
+ *tsinfopp = tsinfo;
+
+err:
+ if (ret != 0 && ret != DB_NOTFOUND)
+ __db_err(lvinfo->dbenv->env,
+ ret, "__get_latest_timestamp_info");
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __put_timestamp_info __P((const DB_LOG_VRFY_INFO *,
+ * PUBLIC: const VRFY_TIMESTAMP_INFO *));
+ */
+int __put_timestamp_info (lvinfo, tsinfo)
+ const DB_LOG_VRFY_INFO *lvinfo;
+ const VRFY_TIMESTAMP_INFO *tsinfo;
+{
+ int ret;
+ DBT key, data;
+
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ key.data = (void *)&(tsinfo->lsn);
+ key.size = sizeof(DB_LSN);
+ data.data = (void *)tsinfo;
+ data.size = sizeof(VRFY_TIMESTAMP_INFO);
+ BDBOP2(lvinfo->dbenv, __db_put(lvinfo->lsntime, lvinfo->ip, NULL,
+ &key, &data, 0), "__put_timestamp_info");
+
+ return (0);
+}
+
+static int
+__lv_txnrgns_lsn_cmp (db, d1, d2)
+ DB *db;
+ const DBT *d1, *d2;
+{
+ struct __lv_txnrange r1, r2;
+
+ DB_ASSERT(db->env, d1->size == sizeof(r1));
+ DB_ASSERT(db->env, d2->size == sizeof(r2));
+ memcpy(&r1, d1->data, d1->size);
+ memcpy(&r2, d2->data, d2->size);
+
+ return (LOG_COMPARE(&(r1.end), &(r2.end)));
+}
+
+/*
+ * __find_lsnrg_by_timerg --
+ * Find the lsn closed interval [beginlsn, endlsn] so that the
+ * corresponding timestamp interval fully contains interval [begin, end].
+ * PUBLIC: int __find_lsnrg_by_timerg __P((DB_LOG_VRFY_INFO *,
+ * PUBLIC: time_t, time_t, DB_LSN *, DB_LSN *));
+ */
+int
+__find_lsnrg_by_timerg(lvinfo, begin, end, startlsn, endlsn)
+ DB_LOG_VRFY_INFO *lvinfo;
+ time_t begin, end;
+ DB_LSN *startlsn, *endlsn;
+{
+ int ret, tret;
+ DBC *csr;
+ struct __lv_timestamp_info *t1, *t2;
+ DBT key, data;
+
+ ret = tret = 0;
+ csr = NULL;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ BDBOP(__db_cursor(lvinfo->timelsn, lvinfo->ip, NULL, &csr, 0));
+
+ /*
+ * We want a lsn range that completely contains [begin, end], so
+ * try move 1 record prev when getting the startlsn.
+ */
+ key.data = &begin;
+ key.size = sizeof(begin);
+ BDBOP(__dbc_get(csr, &key, &data, DB_SET_RANGE));
+ if ((ret = __dbc_get(csr, &key, &data, DB_PREV)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+ if (ret == DB_NOTFOUND)/* begin is smaller than the smallest key. */
+ startlsn->file = startlsn->offset = 0;/* beginning. */
+ else {
+ t1 = (struct __lv_timestamp_info *)data.data;
+ *startlsn = t1->lsn;
+ }
+
+ /*
+ * Move to the last key/data pair of the duplicate set to get the
+ * biggest lsn having end as timestamp.
+ */
+ key.data = &end;
+ key.size = sizeof(end);
+ if ((ret = __dbc_get(csr, &key, &data, DB_SET_RANGE)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+ if (ret == DB_NOTFOUND) {
+ endlsn->file = endlsn->offset = (u_int32_t)-1;/* Biggest lsn. */
+ ret = 0;
+ goto err; /* We are done. */
+ }
+
+ /*
+ * Go to the biggest lsn of the dup set, if the key is the last one,
+ * go to the last one.
+ */
+ if ((ret = __dbc_get(csr, &key, &data, DB_NEXT_NODUP)) != 0 &&
+ ret != DB_NOTFOUND)
+ goto err;
+
+ if (ret == DB_NOTFOUND)
+ BDBOP(__dbc_get(csr, &key, &data, DB_LAST));
+ else
+ BDBOP(__dbc_get(csr, &key, &data, DB_PREV));
+
+ t2 = (struct __lv_timestamp_info *)data.data;
+ *endlsn = t2->lsn;
+err:
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __add_txnrange __P((DB_LOG_VRFY_INFO *, u_int32_t,
+ * PUBLIC: DB_LSN, int32_t, int));
+ */
+int __add_txnrange (lvinfo, txnid, lsn, when, ishead)
+ DB_LOG_VRFY_INFO *lvinfo;
+ u_int32_t txnid;
+ DB_LSN lsn;
+ int32_t when;
+ int ishead; /* Whether it's the 1st log of the txn. */
+{
+ int ret, tret;
+ DBC *csr;
+ struct __lv_txnrange tr, *ptr;
+ DBT key, data;
+
+ csr = NULL;
+ ret = 0;
+ ptr = NULL;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ memset(&tr, 0, sizeof(tr));
+
+ key.data = &txnid;
+ key.size = sizeof(txnid);
+ tr.txnid = txnid;
+ BDBOP(__db_cursor(lvinfo->txnrngs, lvinfo->ip, NULL, &csr, 0));
+ /*
+ * Note that we will backward play the logs to gather such information.
+ */
+ if (!ishead) {
+ tr.end = lsn;
+ tr.when_commit = when;
+ data.data = &tr;
+ data.size = sizeof(tr);
+ BDBOP(__dbc_put(csr, &key, &data, DB_KEYFIRST));
+ } else {
+ /*
+ * Dup data sorted by lsn, and we are backward playing logs,
+ * so the 1st record should be the one we want.
+ */
+ BDBOP(__dbc_get(csr, &key, &data, DB_SET));
+ ptr = (struct __lv_txnrange *)data.data;
+ DB_ASSERT(lvinfo->dbenv->env, IS_ZERO_LSN(ptr->begin));
+ ptr->begin = lsn;
+ BDBOP(__dbc_put(csr, &key, &data, DB_CURRENT));
+ }
+
+err:
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+ return (ret);
+}
+
+/*
+ * __get_aborttxn --
+ * If lsn is the last log of an aborted txn T, T's txnid is
+ * returned via the log verify handle.
+ *
+ * PUBLIC: int __get_aborttxn __P((DB_LOG_VRFY_INFO *, DB_LSN));
+ */
+int
+__get_aborttxn(lvinfo, lsn)
+ DB_LOG_VRFY_INFO *lvinfo;
+ DB_LSN lsn;
+{
+ int ret, tret;
+ u_int32_t txnid;
+ DBC *csr;
+ DBT key, data;
+
+ csr = NULL;
+ txnid = 0;
+ ret = tret = 0;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ key.data = &lsn;
+ key.size = sizeof(lsn);
+ BDBOP(__db_cursor(lvinfo->txnaborts, lvinfo->ip, NULL, &csr, 0));
+ BDBOP(__dbc_get(csr, &key, &data, DB_SET));
+ memcpy(&txnid, data.data, data.size);
+ /*
+ * The lsn is the last op of an aborted txn, call __on_txnabort
+ * before processing next log record.
+ */
+ lvinfo->aborted_txnid = txnid;
+ lvinfo->aborted_txnlsn = lsn;
+
+err:
+ /* It's OK if can't find it. */
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+ return (ret);
+}
+
+/*
+ * __txn_started --
+ * Whether txnid is started before lsn and ended after lsn.
+ *
+ * PUBLIC: int __txn_started __P((DB_LOG_VRFY_INFO *,
+ * PUBLIC: DB_LSN, u_int32_t, int *));
+ */
+int
+__txn_started(lvinfo, lsn, txnid, res)
+ DB_LOG_VRFY_INFO *lvinfo;
+ DB_LSN lsn;
+ u_int32_t txnid;
+ int *res;
+{
+ int ret, tret;
+ DBC *csr;
+ DBT key, data;
+ struct __lv_txnrange *ptr, tr;
+
+ ret = *res = 0;
+ csr = NULL;
+ memset(&tr, 0, sizeof(tr));
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ key.data = &txnid;
+ key.size = sizeof(txnid);
+
+ BDBOP(__db_cursor(lvinfo->txnrngs, lvinfo->ip, NULL, &csr, 0));
+ BDBOP(__dbc_get(csr, &key, &data, DB_SET));
+ for (;ret == 0; ret = __dbc_get(csr, &key, &data, DB_NEXT_DUP)) {
+ ptr = (struct __lv_txnrange *)data.data;
+ if (LOG_COMPARE(&lsn, &(ptr->begin)) > 0 &&
+ LOG_COMPARE(&lsn, &(ptr->end)) <= 0) {
+ *res = 1;
+ break;
+ }
+ }
+err:
+ if (ret == DB_NOTFOUND)
+ ret = 0;/* It's OK if can't find it. */
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __set_logvrfy_dbfuid __P((DB_LOG_VRFY_INFO *));
+ */
+int
+__set_logvrfy_dbfuid(lvinfo)
+ DB_LOG_VRFY_INFO *lvinfo;
+{
+ int ret;
+ const char *p;
+ DBT key, data;
+ size_t buflen;
+
+ p = NULL;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ /* So far we only support verifying a specific db file. */
+ p = lvinfo->lv_config->dbfile;
+ buflen = sizeof(char) * (strlen(p) + 1);
+ key.data = (char *)p;
+ key.size = (u_int32_t)buflen;
+
+ BDBOP2(lvinfo->dbenv, __db_get(lvinfo->fnameuid, lvinfo->ip, NULL,
+ &key, &data, 0), "__set_logvrfy_dbfuid");
+
+ memcpy(lvinfo->target_dbid, data.data, DB_FILE_ID_LEN);
+
+ return (ret);
+}
+
+/*
+ * __add_page_to_txn --
+ * Try adding a page to a txn, result brings back if really added(0/1)
+ * or if there is an access violation(-1).
+ * PUBLIC: int __add_page_to_txn __P((DB_LOG_VRFY_INFO *,
+ * PUBLIC: int32_t, db_pgno_t, u_int32_t, u_int32_t *, int *));
+ */
+int
+__add_page_to_txn (lvinfo, dbregid, pgno, txnid, otxn, result)
+ DB_LOG_VRFY_INFO *lvinfo;
+ int32_t dbregid;
+ db_pgno_t pgno;
+ u_int32_t txnid, *otxn;
+ int *result;
+{
+ int ret;
+ u_int8_t *buf;
+ DBT key, data;
+ size_t buflen;
+ u_int32_t txnid2;
+ VRFY_FILELIFE *pff;
+
+ if (txnid < TXN_MINIMUM) {
+ *result = 0;
+ return (0);
+ }
+ buf = NULL;
+ ret = 0;
+ txnid2 = 0;
+ pff = NULL;
+ buflen = sizeof(u_int8_t) * DB_FILE_ID_LEN + sizeof(db_pgno_t);
+ BDBOP(__os_malloc(lvinfo->dbenv->env, buflen, &buf));
+ memset(buf, 0, buflen);
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ /*
+ * We use the file uid as key because a single db file can have
+ * multiple dbregid at the same time, and we may neglect the fact
+ * that the same db file is being updated by multiple txns if we use
+ * dbregid as key.
+ */
+ key.data = &dbregid;
+ key.size = sizeof(dbregid);
+ if ((ret = __db_get(lvinfo->dbregids, lvinfo->ip, NULL,
+ &key, &data, 0)) != 0) {
+ if (ret == DB_NOTFOUND) {
+ if (F_ISSET(lvinfo, DB_LOG_VERIFY_PARTIAL)) {
+ ret = 0;
+ goto out;
+ } else
+ F_SET(lvinfo, DB_LOG_VERIFY_INTERR);
+ }
+ goto err;
+ }
+ pff = (VRFY_FILELIFE *)data.data;
+ memcpy(buf, pff->fileid, DB_FILE_ID_LEN);
+ memcpy(buf + DB_FILE_ID_LEN, (u_int8_t *)&pgno, sizeof(pgno));
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ key.data = buf;
+ key.size = (u_int32_t)buflen;
+ if ((ret = __db_get(lvinfo->pgtxn, lvinfo->ip, NULL,
+ &key, &data, 0)) != 0) {
+ if (ret == DB_NOTFOUND) {
+ data.data = &txnid;
+ data.size = sizeof(txnid);
+ BDBOP(__db_put(lvinfo->pgtxn, lvinfo->ip, NULL, &key,
+ &data, 0));
+ *result = 1;
+ ret = 0;/* This is not an error. */
+ }
+ goto err;
+ }
+ DB_ASSERT(lvinfo->dbenv->env, data.size == sizeof(txnid2));
+ memcpy(&txnid2, data.data, data.size);
+ if (txnid == txnid2)/* The same txn already has the page. */
+ *result = 0;
+ else {/* Txn txnid is updating pages still held by txnid2. */
+ *result = -1;
+ *otxn = txnid2;
+ }
+out:
+ /* result is set to -1 on violation, 0 if already has it, 1 if added. */
+err:
+ if (buf != NULL)
+ __os_free(lvinfo->dbenv->env, buf);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __del_txn_pages __P((DB_LOG_VRFY_INFO *, u_int32_t));
+ */
+int
+__del_txn_pages(lvinfo, txnid)
+ DB_LOG_VRFY_INFO *lvinfo;
+ u_int32_t txnid;
+{
+ int ret;
+ DBT key;
+
+ ret = 0;
+ memset(&key, 0, sizeof(DBT));
+ key.data = &txnid;
+ key.size = sizeof(txnid);
+
+ BDBOP(__db_del(lvinfo->txnpg, lvinfo->ip, NULL, &key, 0));
+
+err:
+ return (ret);
+}
+
+/*
+ * __is_ancestor_txn --
+ * Tells via res if ptxnid is txnid's parent txn at the moment of lsn.
+ *
+ * PUBLIC: int __is_ancestor_txn __P((DB_LOG_VRFY_INFO *,
+ * PUBLIC: u_int32_t, u_int32_t, DB_LSN, int *));
+ */
+int
+__is_ancestor_txn (lvinfo, ptxnid, txnid, lsn, res)
+ DB_LOG_VRFY_INFO *lvinfo;
+ u_int32_t ptxnid, txnid;
+ DB_LSN lsn;
+ int *res;
+{
+ u_int32_t ptid;
+ int ret, tret;
+ DBC *csr;
+ DB *pdb;
+ DBT key, data;
+ struct __lv_txnrange tr;
+
+ ret = 0;
+ ptid = txnid;
+ csr = NULL;
+ pdb = lvinfo->txnrngs;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ *res = 0;
+ BDBOP(__db_cursor(pdb, lvinfo->ip, NULL, &csr, 0));
+
+ /* See if ptxnid is an ancestor of txnid. */
+ do {
+ key.data = &ptid;
+ key.size = sizeof(ptid);
+ BDBOP(__dbc_get(csr, &key, &data, DB_SET));
+ /* A txnid maybe reused, we want the range having lsn in it. */
+ for (;ret == 0;
+ ret = __dbc_get(csr, &key, &data, DB_NEXT_DUP)) {
+ DB_ASSERT(pdb->env, sizeof(tr) == data.size);
+ memcpy(&tr, data.data, data.size);
+ if (tr.ptxnid > 0 &&
+ LOG_COMPARE(&lsn, &(tr.begin)) >= 0 &&
+ LOG_COMPARE(&lsn, &(tr.end)) <= 0)
+ break;
+ }
+
+ if (tr.ptxnid == ptxnid) {
+ *res = 1;
+ goto out;
+ } else
+ ptid = tr.ptxnid;
+
+ } while (ptid != 0);
+out:
+
+err:
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __return_txn_pages __P((DB_LOG_VRFY_INFO *,
+ * PUBLIC: u_int32_t, u_int32_t));
+ */
+int __return_txn_pages(lvh, ctxn, ptxn)
+ DB_LOG_VRFY_INFO *lvh;
+ u_int32_t ctxn, ptxn;
+{
+ int ret, tret;
+ DBC *csr;
+ DB *pdb, *sdb;
+ DBT key, key2, data, data2;
+ char buf[DB_FILE_ID_LEN + sizeof(db_pgno_t)];
+
+ ret = tret = 0;
+ csr = NULL;
+ sdb = lvh->txnpg;
+ pdb = lvh->pgtxn;
+ memset(&key, 0, sizeof(DBT));
+ memset(&key2, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ memset(&data2, 0, sizeof(DBT));
+
+ BDBOP(__db_cursor(sdb, lvh->ip, NULL, &csr, 0));
+ key.data = &ctxn;
+ key.size = sizeof(ctxn);
+ key2.data = &ptxn;
+ key2.size = sizeof(ptxn);
+ data2.data = buf;
+ data2.ulen = DB_FILE_ID_LEN + sizeof(db_pgno_t);
+ data2.flags = DB_DBT_USERMEM;
+
+ for (ret = __dbc_pget(csr, &key, &data2, &data, DB_SET); ret == 0;
+ ret = __dbc_pget(csr, &key, &data2, &data, DB_NEXT_DUP))
+ BDBOP(__db_put(pdb, lvh->ip, NULL, &data2, &key2, 0));
+ if ((ret = __del_txn_pages(lvh, ctxn)) != 0 && ret != DB_NOTFOUND)
+ goto err;
+err:
+ if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0)
+ ret = tret;
+ return (ret);
+}
+
+#define ADD_ITEM(lvh, logtype) ((lvh)->logtype_names[(logtype)] = (#logtype))
+static void
+__lv_setup_logtype_names(lvinfo)
+ DB_LOG_VRFY_INFO *lvinfo;
+{
+ ADD_ITEM(lvinfo, DB___bam_irep);
+ ADD_ITEM(lvinfo, DB___bam_split_42);
+ ADD_ITEM(lvinfo, DB___bam_split);
+ ADD_ITEM(lvinfo, DB___bam_rsplit);
+ ADD_ITEM(lvinfo, DB___bam_adj);
+ ADD_ITEM(lvinfo, DB___bam_cadjust);
+ ADD_ITEM(lvinfo, DB___bam_cdel);
+ ADD_ITEM(lvinfo, DB___bam_repl);
+ ADD_ITEM(lvinfo, DB___bam_root);
+ ADD_ITEM(lvinfo, DB___bam_curadj);
+ ADD_ITEM(lvinfo, DB___bam_rcuradj);
+ ADD_ITEM(lvinfo, DB___bam_relink_43);
+ ADD_ITEM(lvinfo, DB___bam_merge_44);
+ ADD_ITEM(lvinfo, DB___crdel_metasub);
+ ADD_ITEM(lvinfo, DB___crdel_inmem_create);
+ ADD_ITEM(lvinfo, DB___crdel_inmem_rename);
+ ADD_ITEM(lvinfo, DB___crdel_inmem_remove);
+ ADD_ITEM(lvinfo, DB___dbreg_register);
+ ADD_ITEM(lvinfo, DB___db_addrem);
+ ADD_ITEM(lvinfo, DB___db_big);
+ ADD_ITEM(lvinfo, DB___db_ovref);
+ ADD_ITEM(lvinfo, DB___db_relink_42);
+ ADD_ITEM(lvinfo, DB___db_debug);
+ ADD_ITEM(lvinfo, DB___db_noop);
+ ADD_ITEM(lvinfo, DB___db_pg_alloc_42);
+ ADD_ITEM(lvinfo, DB___db_pg_alloc);
+ ADD_ITEM(lvinfo, DB___db_pg_free_42);
+ ADD_ITEM(lvinfo, DB___db_pg_free);
+ ADD_ITEM(lvinfo, DB___db_cksum);
+ ADD_ITEM(lvinfo, DB___db_pg_freedata_42);
+ ADD_ITEM(lvinfo, DB___db_pg_freedata);
+ ADD_ITEM(lvinfo, DB___db_pg_init);
+ ADD_ITEM(lvinfo, DB___db_pg_sort_44);
+ ADD_ITEM(lvinfo, DB___db_pg_trunc);
+ ADD_ITEM(lvinfo, DB___db_realloc);
+ ADD_ITEM(lvinfo, DB___db_relink);
+ ADD_ITEM(lvinfo, DB___db_merge);
+ ADD_ITEM(lvinfo, DB___db_pgno);
+#ifdef HAVE_HASH
+ ADD_ITEM(lvinfo, DB___ham_insdel);
+ ADD_ITEM(lvinfo, DB___ham_newpage);
+ ADD_ITEM(lvinfo, DB___ham_splitdata);
+ ADD_ITEM(lvinfo, DB___ham_replace);
+ ADD_ITEM(lvinfo, DB___ham_copypage);
+ ADD_ITEM(lvinfo, DB___ham_metagroup_42);
+ ADD_ITEM(lvinfo, DB___ham_metagroup);
+ ADD_ITEM(lvinfo, DB___ham_groupalloc_42);
+ ADD_ITEM(lvinfo, DB___ham_groupalloc);
+ ADD_ITEM(lvinfo, DB___ham_changeslot);
+ ADD_ITEM(lvinfo, DB___ham_contract);
+ ADD_ITEM(lvinfo, DB___ham_curadj);
+ ADD_ITEM(lvinfo, DB___ham_chgpg);
+#endif
+#ifdef HAVE_QUEUE
+ ADD_ITEM(lvinfo, DB___qam_incfirst);
+ ADD_ITEM(lvinfo, DB___qam_mvptr);
+ ADD_ITEM(lvinfo, DB___qam_del);
+ ADD_ITEM(lvinfo, DB___qam_add);
+ ADD_ITEM(lvinfo, DB___qam_delext);
+#endif
+ ADD_ITEM(lvinfo, DB___txn_regop_42);
+ ADD_ITEM(lvinfo, DB___txn_regop);
+ ADD_ITEM(lvinfo, DB___txn_ckp_42);
+ ADD_ITEM(lvinfo, DB___txn_ckp);
+ ADD_ITEM(lvinfo, DB___txn_child);
+ ADD_ITEM(lvinfo, DB___txn_xa_regop_42);
+ ADD_ITEM(lvinfo, DB___txn_prepare);
+ ADD_ITEM(lvinfo, DB___txn_recycle);
+ ADD_ITEM(lvinfo, DB___fop_create_42);
+ ADD_ITEM(lvinfo, DB___fop_create);
+ ADD_ITEM(lvinfo, DB___fop_remove);
+ ADD_ITEM(lvinfo, DB___fop_write_42);
+ ADD_ITEM(lvinfo, DB___fop_write);
+ ADD_ITEM(lvinfo, DB___fop_rename_42);
+ ADD_ITEM(lvinfo, DB___fop_rename_noundo_46);
+ ADD_ITEM(lvinfo, DB___fop_rename);
+ ADD_ITEM(lvinfo, DB___fop_rename_noundo);
+ ADD_ITEM(lvinfo, DB___fop_file_remove);
+}