diff options
author | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-07-20 20:00:05 +0100 |
---|---|---|
committer | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-07-20 20:00:05 +0100 |
commit | 3ef782d3745ea8f25a3151561a3cfb882190210e (patch) | |
tree | 86b9c2f5fde051dd0bced99b3fc9f5a3ba08db69 /src/log | |
download | berkeleydb-3ef782d3745ea8f25a3151561a3cfb882190210e.tar.gz |
Tarball conversion
Diffstat (limited to 'src/log')
-rw-r--r-- | src/log/log.c | 1727 | ||||
-rw-r--r-- | src/log/log_archive.c | 643 | ||||
-rw-r--r-- | src/log/log_compare.c | 66 | ||||
-rw-r--r-- | src/log/log_debug.c | 146 | ||||
-rw-r--r-- | src/log/log_get.c | 1626 | ||||
-rw-r--r-- | src/log/log_method.c | 533 | ||||
-rw-r--r-- | src/log/log_print.c | 380 | ||||
-rw-r--r-- | src/log/log_put.c | 2041 | ||||
-rw-r--r-- | src/log/log_stat.c | 336 | ||||
-rw-r--r-- | src/log/log_verify.c | 437 | ||||
-rw-r--r-- | src/log/log_verify_auto.c | 318 | ||||
-rw-r--r-- | src/log/log_verify_int.c | 4353 | ||||
-rw-r--r-- | src/log/log_verify_stub.c | 79 | ||||
-rw-r--r-- | src/log/log_verify_util.c | 2234 |
14 files changed, 14919 insertions, 0 deletions
diff --git a/src/log/log.c b/src/log/log.c new file mode 100644 index 00000000..5808145f --- /dev/null +++ b/src/log/log.c @@ -0,0 +1,1727 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +static int __log_init __P((ENV *, DB_LOG *)); +static int __log_recover __P((DB_LOG *)); + +/* + * __log_open -- + * Internal version of log_open: only called from ENV->open. + * + * PUBLIC: int __log_open __P((ENV *)); + */ +int +__log_open(env) + ENV *env; +{ + DB_ENV *dbenv; + DB_LOG *dblp; + LOG *lp; + u_int8_t *bulk; + int region_locked, ret; + + dbenv = env->dbenv; + region_locked = 0; + + /* Create/initialize the DB_LOG structure. */ + if ((ret = __os_calloc(env, 1, sizeof(DB_LOG), &dblp)) != 0) + return (ret); + dblp->env = env; + + /* Join/create the log region. */ + if ((ret = __env_region_share(env, &dblp->reginfo)) != 0) + goto err; + + /* If we created the region, initialize it. */ + if (F_ISSET(&dblp->reginfo, REGION_CREATE)) + if ((ret = __log_init(env, dblp)) != 0) + goto err; + + /* Set the local addresses. */ + lp = dblp->reginfo.primary = R_ADDR(&dblp->reginfo, + ((REGENV *)env->reginfo->primary)->lg_primary); + dblp->bufp = R_ADDR(&dblp->reginfo, lp->buffer_off); + + /* + * If the region is threaded, we have to lock the DBREG list, and we + * need to allocate a mutex for that purpose. + */ + if ((ret = __mutex_alloc(env, + MTX_LOG_REGION, DB_MUTEX_PROCESS_ONLY, &dblp->mtx_dbreg)) != 0) + goto err; + + /* + * Set the handle -- we may be about to run recovery, which allocates + * log cursors. Log cursors require logging be already configured, + * and the handle being set is what demonstrates that. + * + * If we created the region, run recovery. If that fails, make sure + * we reset the log handle before cleaning up, otherwise we will try + * and clean up again in the mainline ENV initialization code. + */ + env->lg_handle = dblp; + + if (F_ISSET(&dblp->reginfo, REGION_CREATE)) { + /* + * We first take the log file size from the environment, if + * specified. If that wasn't set, default it. Regardless, + * recovery may set it from the persistent information in a + * log file header. + */ + if (lp->log_size == 0) + lp->log_size = + FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) ? + LG_MAX_INMEM : LG_MAX_DEFAULT; + + if ((ret = __log_recover(dblp)) != 0) + goto err; + + /* + * If the next log file size hasn't been set yet, default it + * to the current log file size. + */ + if (lp->log_nsize == 0) + lp->log_nsize = lp->log_size; + + /* + * If we haven't written any log files, write the first one + * so that checkpoint gets a valid ckp_lsn value. + */ + if (IS_INIT_LSN(lp->lsn) && + (ret = __log_newfile(dblp, NULL, 0, 0)) != 0) + goto err; + + /* + * Initialize replication's next-expected LSN value + * and replication's bulk buffer. In __env_open, we + * always create/open the replication region before + * the log region so we're assured that our rep_handle + * is valid at this point, if replication is being used. + */ + lp->ready_lsn = lp->lsn; + if (IS_ENV_REPLICATED(env)) { + if ((ret = + __env_alloc(&dblp->reginfo, MEGABYTE, &bulk)) != 0) + goto err; + lp->bulk_buf = R_OFFSET(&dblp->reginfo, bulk); + lp->bulk_len = MEGABYTE; + lp->bulk_off = 0; + lp->wait_ts = env->rep_handle->request_gap; + __os_gettime(env, &lp->rcvd_ts, 1); + } else { + lp->bulk_buf = INVALID_ROFF; + lp->bulk_len = 0; + lp->bulk_off = 0; + } + } else { + /* + * A process joining the region may have reset the log file + * size, too. If so, it only affects the next log file we + * create. We need to check that the size is reasonable given + * the buffer size in the region. + */ + LOG_SYSTEM_LOCK(env); + region_locked = 1; + + if (dbenv->lg_size != 0) { + if ((ret = + __log_check_sizes(env, dbenv->lg_size, 0)) != 0) + goto err; + + lp->log_nsize = dbenv->lg_size; + } + + LOG_SYSTEM_UNLOCK(env); + region_locked = 0; + + if (dbenv->lg_flags != 0 && (ret = + __log_set_config_int(dbenv, dbenv->lg_flags, 1, 0)) != 0) + return (ret); + } + dblp->reginfo.mtx_alloc = lp->mtx_region; + + return (0); + +err: if (dblp->reginfo.addr != NULL) { + if (region_locked) + LOG_SYSTEM_UNLOCK(env); + (void)__env_region_detach(env, &dblp->reginfo, 0); + } + env->lg_handle = NULL; + + (void)__mutex_free(env, &dblp->mtx_dbreg); + __os_free(env, dblp); + + return (ret); +} + +/* + * __log_init -- + * Initialize a log region in shared memory. + */ +static int +__log_init(env, dblp) + ENV *env; + DB_LOG *dblp; +{ + DB_ENV *dbenv; + LOG *lp; + int ret; + void *p; + + dbenv = env->dbenv; + + /* + * This is the first point where we can validate the buffer size, + * because we know all three settings have been configured (file size, + * buffer size and the in-memory flag). + */ + if ((ret = + __log_check_sizes(env, dbenv->lg_size, dbenv->lg_bsize)) != 0) + return (ret); + + if ((ret = __env_alloc(&dblp->reginfo, + sizeof(*lp), &dblp->reginfo.primary)) != 0) + goto mem_err; + + ((REGENV *)env->reginfo->primary)->lg_primary = + R_OFFSET(&dblp->reginfo, dblp->reginfo.primary); + + lp = dblp->reginfo.primary; + memset(lp, 0, sizeof(*lp)); + + /* We share the region so we need the same mutex. */ + lp->mtx_region = ((REGENV *)env->reginfo->primary)->mtx_regenv; + + lp->fid_max = 0; + SH_TAILQ_INIT(&lp->fq); + lp->free_fid_stack = INVALID_ROFF; + lp->free_fids = lp->free_fids_alloced = 0; + + /* Initialize LOG LSNs. */ + INIT_LSN(lp->lsn); + INIT_LSN(lp->t_lsn); + + /* + * It's possible to be waiting for an LSN of [1][0], if a replication + * client gets the first log record out of order. An LSN of [0][0] + * signifies that we're not waiting. + */ + ZERO_LSN(lp->waiting_lsn); + + /* + * Log makes note of the fact that it ran into a checkpoint on + * startup if it did so, as a recovery optimization. A zero + * LSN signifies that it hasn't found one [yet]. + */ + ZERO_LSN(lp->cached_ckp_lsn); + + if ((ret = + __mutex_alloc(env, MTX_LOG_FILENAME, 0, &lp->mtx_filelist)) != 0) + return (ret); + if ((ret = __mutex_alloc(env, MTX_LOG_FLUSH, 0, &lp->mtx_flush)) != 0) + return (ret); + + /* Initialize the buffer. */ + if ((ret = __env_alloc(&dblp->reginfo, dbenv->lg_bsize, &p)) != 0) { +mem_err: __db_errx( env, DB_STR("2524", + "unable to allocate log region memory")); + return (ret); + } + lp->regionmax = dbenv->lg_regionmax; + lp->buffer_off = R_OFFSET(&dblp->reginfo, p); + lp->buffer_size = dbenv->lg_bsize; + lp->filemode = dbenv->lg_filemode; + lp->log_size = lp->log_nsize = dbenv->lg_size; + lp->stat.st_fileid_init = dbenv->lg_fileid_init; + + /* Initialize the commit Queue. */ + SH_TAILQ_INIT(&lp->free_commits); + SH_TAILQ_INIT(&lp->commits); + lp->ncommit = 0; + + /* Initialize the logfiles list for in-memory logs. */ + SH_TAILQ_INIT(&lp->logfiles); + SH_TAILQ_INIT(&lp->free_logfiles); + + /* + * Fill in the log's persistent header. Don't fill in the log file + * sizes, as they may change at any time and so have to be filled in + * as each log file is created. + */ + lp->persist.magic = DB_LOGMAGIC; + /* + * Don't use __log_set_version because env->dblp isn't set up yet. + */ + lp->persist.version = DB_LOGVERSION; + lp->persist.notused = 0; + env->lg_handle = dblp; + + /* Migrate persistent flags from the ENV into the region. */ + if (dbenv->lg_flags != 0 && + (ret = __log_set_config_int(dbenv, dbenv->lg_flags, 1, 1)) != 0) + return (ret); + + (void)time(&lp->timestamp); + return (0); +} + +/* + * __log_recover -- + * Recover a log. + */ +static int +__log_recover(dblp) + DB_LOG *dblp; +{ + DBT dbt; + DB_ENV *dbenv; + DB_LOGC *logc; + DB_LSN lsn; + ENV *env; + LOG *lp; + u_int32_t cnt, rectype; + int ret; + logfile_validity status; + + env = dblp->env; + dbenv = env->dbenv; + logc = NULL; + lp = dblp->reginfo.primary; + + /* + * Find a log file. If none exist, we simply return, leaving + * everything initialized to a new log. + */ + if ((ret = __log_find(dblp, 0, &cnt, &status)) != 0) + return (ret); + if (cnt == 0) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) + __db_msg(env, DB_STR("2525", "No log files found")); + return (0); + } + + /* + * If the last file is an old, unreadable version, start a new + * file. Don't bother finding the end of the last log file; + * we assume that it's valid in its entirety, since the user + * should have shut down cleanly or run recovery before upgrading. + */ + if (status == DB_LV_OLD_UNREADABLE) { + lp->lsn.file = lp->s_lsn.file = cnt + 1; + lp->lsn.offset = lp->s_lsn.offset = 0; + goto skipsearch; + } + DB_ASSERT(env, + (status == DB_LV_NORMAL || status == DB_LV_OLD_READABLE)); + + /* + * We have the last useful log file and we've loaded any persistent + * information. Set the end point of the log past the end of the last + * file. Read the last file, looking for the last checkpoint and + * the log's end. + */ + lp->lsn.file = cnt + 1; + lp->lsn.offset = 0; + lsn.file = cnt; + lsn.offset = 0; + + /* + * Allocate a cursor and set it to the first record. This shouldn't + * fail, leave error messages on. + */ + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + F_SET(logc, DB_LOG_LOCKED); + memset(&dbt, 0, sizeof(dbt)); + if ((ret = __logc_get(logc, &lsn, &dbt, DB_SET)) != 0) + goto err; + + /* + * Read to the end of the file. This may fail at some point, so + * turn off error messages. + */ + F_SET(logc, DB_LOG_SILENT_ERR); + while (__logc_get(logc, &lsn, &dbt, DB_NEXT) == 0) { + if (dbt.size < sizeof(u_int32_t)) + continue; + LOGCOPY_32(env, &rectype, dbt.data); + if (rectype == DB___txn_ckp) + /* + * If we happen to run into a checkpoint, cache its + * LSN so that the transaction system doesn't have + * to walk this log file again looking for it. + */ + lp->cached_ckp_lsn = lsn; + } + F_CLR(logc, DB_LOG_SILENT_ERR); + + /* + * We now know where the end of the log is. Set the first LSN that + * we want to return to an application and the LSN of the last known + * record on disk. + */ + lp->lsn = lsn; + lp->s_lsn = lsn; + lp->lsn.offset += logc->len; + lp->s_lsn.offset += logc->len; + + /* Set up the current buffer information, too. */ + lp->len = logc->len; + lp->a_off = 0; + lp->b_off = 0; + lp->w_off = lp->lsn.offset; + +skipsearch: + if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) + __db_msg(env, DB_STR_A("2526", + "Finding last valid log LSN: file: %lu offset %lu", + "%lu %lu"), (u_long)lp->lsn.file, (u_long)lp->lsn.offset); + +err: if (logc != NULL) + (void)__logc_close(logc); + + return (ret); +} + +/* + * __log_find -- + * Try to find a log file. If find_first is set, valp will contain + * the number of the first readable log file, else it will contain the number + * of the last log file (which may be too old to read). + * + * PUBLIC: int __log_find __P((DB_LOG *, int, u_int32_t *, logfile_validity *)); + */ +int +__log_find(dblp, find_first, valp, statusp) + DB_LOG *dblp; + int find_first; + u_int32_t *valp; + logfile_validity *statusp; +{ + ENV *env; + LOG *lp; + logfile_validity logval_status, status; + struct __db_filestart *filestart; + u_int32_t clv, logval; + int cnt, fcnt, ret; + const char *dir; + char *c, **names, *p, *q; + + env = dblp->env; + lp = dblp->reginfo.primary; + logval_status = status = DB_LV_NONEXISTENT; + + /* Return a value of 0 as the log file number on failure. */ + *valp = 0; + + if (lp->db_log_inmemory) { + filestart = find_first ? + SH_TAILQ_FIRST(&lp->logfiles, __db_filestart) : + SH_TAILQ_LAST(&lp->logfiles, links, __db_filestart); + if (filestart != NULL) { + *valp = filestart->file; + logval_status = DB_LV_NORMAL; + } + *statusp = logval_status; + return (0); + } + + /* Find the directory name. */ + if ((ret = __log_name(dblp, 1, &p, NULL, 0)) != 0) { + __os_free(env, p); + return (ret); + } + if ((q = __db_rpath(p)) == NULL) + dir = PATH_DOT; + else { + *q = '\0'; + dir = p; + } + + /* Get the list of file names. */ +retry: if ((ret = __os_dirlist(env, dir, 0, &names, &fcnt)) != 0) { + __db_err(env, ret, "%s", dir); + __os_free(env, p); + return (ret); + } + + /* Search for a valid log file name. */ + for (cnt = fcnt, clv = logval = 0; --cnt >= 0;) { + if (!IS_LOG_FILE(names[cnt])) + continue; + + /* + * Names of the form log\.[0-9]* are reserved for DB. Other + * names sharing LFPREFIX, such as "log.db", are legal. + */ + for (c = names[cnt] + sizeof(LFPREFIX) - 1; *c != '\0'; c++) + if (!isdigit((int)*c)) + break; + if (*c != '\0') + continue; + + /* + * Use atol, not atoi; if an "int" is 16-bits, the largest + * log file name won't fit. + */ + clv = (u_int32_t)atol(names[cnt] + (sizeof(LFPREFIX) - 1)); + + /* + * If searching for the first log file, we want to return the + * oldest log file we can read, or, if no readable log files + * exist, the newest log file we can't read (the crossover + * point between the old and new versions of the log file). + * + * If we're searching for the last log file, we want to return + * the newest log file, period. + * + * Readable log files should never precede unreadable log + * files, that would mean the admin seriously screwed up. + */ + if (find_first) { + if (logval != 0 && + status != DB_LV_OLD_UNREADABLE && clv > logval) + continue; + } else + if (logval != 0 && clv < logval) + continue; + + if ((ret = __log_valid(dblp, clv, 1, NULL, 0, + &status, NULL)) != 0) { + /* + * If we have raced with removal of a log file since + * the call to __os_dirlist, it may no longer exist. + * In that case, just go on to the next one. If we're + * at the end of the list, all of the log files we saw + * initially are gone and we need to get the list again. + */ + if (ret == ENOENT) { + ret = 0; + if (cnt == 0) { + __os_dirfree(env, names, fcnt); + goto retry; + } + continue; + } + __db_err(env, ret, DB_STR_A("2527", + "Invalid log file: %s", "%s"), names[cnt]); + goto err; + } + switch (status) { + case DB_LV_NONEXISTENT: + /* __log_valid never returns DB_LV_NONEXISTENT. */ + DB_ASSERT(env, 0); + break; + case DB_LV_INCOMPLETE: + /* + * The last log file may not have been initialized -- + * it's possible to create a log file but not write + * anything to it. If performing recovery (that is, + * if find_first isn't set), ignore the file, it's + * not interesting. If we're searching for the first + * log record, return the file (assuming we don't find + * something better), as the "real" first log record + * is likely to be in the log buffer, and we want to + * set the file LSN for our return. + */ + if (find_first) + goto found; + break; + case DB_LV_OLD_UNREADABLE: + /* + * If we're searching for the first log file, then we + * only want this file if we don't yet have a file or + * already have an unreadable file and this one is + * newer than that one. If we're searching for the + * last log file, we always want this file because we + * wouldn't be here if it wasn't newer than our current + * choice. + */ + if (!find_first || logval == 0 || + (status == DB_LV_OLD_UNREADABLE && clv > logval)) + goto found; + break; + case DB_LV_NORMAL: + case DB_LV_OLD_READABLE: +found: logval = clv; + logval_status = status; + break; + } + } + + *valp = logval; + +err: __os_dirfree(env, names, fcnt); + __os_free(env, p); + *statusp = logval_status; + + return (ret); +} + +/* + * log_valid -- + * Validate a log file. Returns an error code in the event of + * a fatal flaw in a the specified log file; returns success with + * a code indicating the currentness and completeness of the specified + * log file if it is not unexpectedly flawed (that is, if it's perfectly + * normal, if it's zero-length, or if it's an old version). + * + * PUBLIC: int __log_valid __P((DB_LOG *, u_int32_t, int, + * PUBLIC: DB_FH **, u_int32_t, logfile_validity *, u_int32_t *)); + */ +int +__log_valid(dblp, number, set_persist, fhpp, flags, statusp, versionp) + DB_LOG *dblp; + u_int32_t number; + int set_persist; + DB_FH **fhpp; + u_int32_t flags; + logfile_validity *statusp; + u_int32_t *versionp; +{ + DB_CIPHER *db_cipher; + DB_FH *fhp; + ENV *env; + HDR *hdr; + LOG *lp; + LOGP *persist; + logfile_validity status; + size_t hdrsize, nr, recsize; + int chksum_includes_hdr, is_hmac, ret; + u_int32_t logversion; + u_int8_t *tmp; + char *fname; + + env = dblp->env; + db_cipher = env->crypto_handle; + fhp = NULL; + persist = NULL; + status = DB_LV_NORMAL; + tmp = NULL; +#if defined(HAVE_LOG_CHECKSUM) + /* Most log versions include the hdr in the checksum. */ + chksum_includes_hdr = 1; +#else + COMPQUIET(chksum_includes_hdr, 0); +#endif + + /* Return the file handle to our caller, on request */ + if (fhpp != NULL) + *fhpp = NULL; + + if (flags == 0) + flags = DB_OSO_RDONLY | DB_OSO_SEQ; + /* Try to open the log file. */ + if ((ret = __log_name(dblp, number, &fname, &fhp, flags)) != 0) { + __os_free(env, fname); + return (ret); + } + + hdrsize = HDR_NORMAL_SZ; + is_hmac = 0; + recsize = sizeof(LOGP); + if (CRYPTO_ON(env)) { + hdrsize = HDR_CRYPTO_SZ; + recsize = sizeof(LOGP); + recsize += db_cipher->adj_size(recsize); + is_hmac = 1; + } + if ((ret = __os_calloc(env, 1, recsize + hdrsize, &tmp)) != 0) + goto err; + + hdr = (HDR *)tmp; + persist = (LOGP *)(tmp + hdrsize); + + /* + * Try to read the header. This can fail if the log is truncated, or + * if we find a preallocated log file where the header has not yet been + * written, so we need to check whether the header is zero-filled. + */ + if ((ret = __os_read(env, fhp, tmp, recsize + hdrsize, &nr)) != 0 || + nr != recsize + hdrsize || + (hdr->len == 0 && persist->magic == 0 && persist->log_size == 0)) { + if (ret == 0) + status = DB_LV_INCOMPLETE; + else + /* + * The error was a fatal read error, not just an + * incompletely initialized log file. + */ + __db_err(env, ret, DB_STR_A("2528", + "ignoring log file: %s", "%s"), fname); + goto err; + } + + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + + /* + * Now we have to validate the persistent record. We have + * several scenarios we have to deal with: + * + * 1. User has crypto turned on: + * - They're reading an old, unencrypted log file + * . We will fail the record size match check below. + * - They're reading a current, unencrypted log file + * . We will fail the record size match check below. + * - They're reading an old, encrypted log file [NOT YET] + * . After decryption we'll fail the version check. [NOT YET] + * - They're reading a current, encrypted log file + * . We should proceed as usual. + * 2. User has crypto turned off: + * - They're reading an old, unencrypted log file + * . We will fail the version check. + * - They're reading a current, unencrypted log file + * . We should proceed as usual. + * - They're reading an old, encrypted log file [NOT YET] + * . We'll fail the magic number check (it is encrypted). + * - They're reading a current, encrypted log file + * . We'll fail the magic number check (it is encrypted). + */ + if (CRYPTO_ON(env)) { + /* + * If we are trying to decrypt an unencrypted log + * we can only detect that by having an unreasonable + * data length for our persistent data. + */ + if ((hdr->len - hdrsize) != sizeof(LOGP)) { + __db_errx(env, "log record size mismatch"); + goto err; + } + /* + * The checksum is calculated from the encrypted data, and, + * for recent logs, the fields hdr->{prev,len}. + */ +#ifdef HAVE_LOG_CHECKSUM + if ((ret = __db_check_chksum(env, hdr, db_cipher, + &hdr->chksum[0], (u_int8_t *)persist, + hdr->len - hdrsize, is_hmac)) != 0) { + /* + * The checksum doesn't verify when the header fields + * are included; try without the header. + */ + + if ((ret = __db_check_chksum(env, NULL, db_cipher, + &hdr->chksum[0], (u_int8_t *)persist, + hdr->len - hdrsize, is_hmac)) != 0) + goto bad_checksum; + /* + * The checksum verifies without the header. Make note + * of that, because it is only acceptable when the log + * version < DB_LOGCHKSUM. Later, when we determine log + * version, we will confirm this. + */ + chksum_includes_hdr = 0; + } +#endif + + if ((ret = db_cipher->decrypt(env, db_cipher->data, + &hdr->iv[0], (u_int8_t *)persist, hdr->len - hdrsize)) != 0) + goto err; + } + + /* Swap the header, if necessary. */ + if (LOG_SWAPPED(env)) { + /* + * If the magic number is not byte-swapped, we're looking at an + * old log that we can no longer read. + */ + if (persist->magic == DB_LOGMAGIC) { + __db_errx(env, DB_STR_A("2529", + "Ignoring log file: %s historic byte order", + "%s"), fname); + status = DB_LV_OLD_UNREADABLE; + goto err; + } + + __log_persistswap(persist); + } + + /* Validate the header. */ + if (persist->magic != DB_LOGMAGIC) { + __db_errx(env, DB_STR_A("2530", + "Ignoring log file: %s: magic number %lx, not %lx", + "%s %lx %lx"), fname, + (u_long)persist->magic, (u_long)DB_LOGMAGIC); + ret = EINVAL; + goto err; + } + + logversion = persist->version; + /* + * Set our status code to indicate whether the log file belongs to an + * unreadable or readable old version; leave it alone if and only if + * the log file version is the current one. + */ + if (logversion > DB_LOGVERSION) { + /* This is a fatal error--the log file is newer than DB. */ + __db_errx(env, DB_STR_A("2531", + "Unacceptable log file %s: unsupported log version %lu", + "%s %lu"), fname, (u_long)logversion); + ret = EINVAL; + goto err; + } else if (logversion < DB_LOGOLDVER) { + status = DB_LV_OLD_UNREADABLE; + /* This is a non-fatal error, but give some feedback. */ + __db_errx(env, DB_STR_A("2532", + "Skipping log file %s: historic log version %lu", "%s %lu"), + fname, (u_long)logversion); + /* + * We don't want to set persistent info based on an unreadable + * region, so jump to "err". + */ + goto err; + } else if (logversion < DB_LOGVERSION) + status = DB_LV_OLD_READABLE; + + /* + * We could not check the checksum before checking the magic and version + * because old log headers put the length and checksum in a different + * location. + */ +#ifdef HAVE_LOG_CHECKSUM + if (CRYPTO_ON(env)) { + /* + * We might have to declare a checksum failure here, if: + * - the checksum verified only by ignoring the header, and + * - the log version indicates that the header should have + * been included. + */ + if (!chksum_includes_hdr && logversion >= DB_LOGCHKSUM) + goto bad_checksum; + } else { + /* + * The checksum was calculated with the swapped byte order. We + * might need to swap them back; the check needs the same bytes. + */ + if (LOG_SWAPPED(env)) + __log_persistswap(persist); + /* + * We have the logversion here, so we know whether to include + * the hdr or not. + */ + if ((ret = __db_check_chksum(env, + logversion >= DB_LOGCHKSUM ? hdr : NULL, db_cipher, + &hdr->chksum[0], (u_int8_t *)persist, + hdr->len - hdrsize, is_hmac)) != 0) { +bad_checksum: + __db_errx(env, DB_STR("2533", + "log record checksum mismatch")); + goto err; + } + + if (LOG_SWAPPED(env)) + __log_persistswap(persist); + } +#endif + + /* + * If the log is readable so far and we're doing system initialization, + * set the region's persistent information based on the headers. + * + * Override the current log file size. + */ + if (set_persist) { + lp = dblp->reginfo.primary; + lp->log_size = persist->log_size; + lp->persist.version = logversion; + } + if (versionp != NULL) + *versionp = logversion; + +err: if (fname != NULL) + __os_free(env, fname); + if (ret == 0 && fhpp != NULL) + *fhpp = fhp; + else + /* Must close on error or if we only used it locally. */ + (void)__os_closehandle(env, fhp); + if (tmp != NULL) + __os_free(env, tmp); + + if (statusp != NULL) + *statusp = status; + + return (ret); +} + +/* + * __log_env_refresh -- + * Clean up after the log system on a close or failed open. + * + * PUBLIC: int __log_env_refresh __P((ENV *)); + */ +int +__log_env_refresh(env) + ENV *env; +{ + DB_LOG *dblp; + LOG *lp; + REGINFO *reginfo; + struct __fname *fnp; + struct __db_commit *commit; + struct __db_filestart *filestart; + int ret, t_ret; + + dblp = env->lg_handle; + reginfo = &dblp->reginfo; + lp = reginfo->primary; + ret = 0; + + /* + * Flush the log if it's private -- there's no Berkeley DB guarantee + * that this gets done, but in case the application has forgotten to + * flush for durability, it's the polite thing to do. + */ + if (F_ISSET(env, ENV_PRIVATE) && + (t_ret = __log_flush(env, NULL)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __dbreg_close_files(env, 0)) != 0 && ret == 0) + ret = t_ret; + + /* + * After we close the files, check for any unlogged closes left in + * the shared memory queue. If we find any, try to log it, otherwise + * return the error. We cannot say the environment was closed + * cleanly. + */ + MUTEX_LOCK(env, lp->mtx_filelist); + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) + if (F_ISSET(fnp, DB_FNAME_NOTLOGGED) && + (t_ret = __dbreg_close_id_int( + env, fnp, DBREG_CLOSE, 1)) != 0) + ret = t_ret; + MUTEX_UNLOCK(env, lp->mtx_filelist); + + /* + * If a private region, return the memory to the heap. Not needed for + * filesystem-backed or system shared memory regions, that memory isn't + * owned by any particular process. + */ + if (F_ISSET(env, ENV_PRIVATE)) { + reginfo->mtx_alloc = MUTEX_INVALID; + /* Discard the flush mutex. */ + if ((t_ret = + __mutex_free(env, &lp->mtx_flush)) != 0 && ret == 0) + ret = t_ret; + + /* Discard the buffer. */ + __env_alloc_free(reginfo, R_ADDR(reginfo, lp->buffer_off)); + + /* Discard stack of free file IDs. */ + if (lp->free_fid_stack != INVALID_ROFF) + __env_alloc_free(reginfo, + R_ADDR(reginfo, lp->free_fid_stack)); + + /* Discard the list of in-memory log file markers. */ + while ((filestart = SH_TAILQ_FIRST(&lp->logfiles, + __db_filestart)) != NULL) { + SH_TAILQ_REMOVE(&lp->logfiles, filestart, links, + __db_filestart); + __env_alloc_free(reginfo, filestart); + } + + while ((filestart = SH_TAILQ_FIRST(&lp->free_logfiles, + __db_filestart)) != NULL) { + SH_TAILQ_REMOVE(&lp->free_logfiles, filestart, links, + __db_filestart); + __env_alloc_free(reginfo, filestart); + } + + /* Discard commit queue elements. */ + while ((commit = SH_TAILQ_FIRST(&lp->free_commits, + __db_commit)) != NULL) { + SH_TAILQ_REMOVE(&lp->free_commits, commit, links, + __db_commit); + __env_alloc_free(reginfo, commit); + } + + /* Discard replication bulk buffer. */ + if (lp->bulk_buf != INVALID_ROFF) { + __env_alloc_free(reginfo, + R_ADDR(reginfo, lp->bulk_buf)); + lp->bulk_buf = INVALID_ROFF; + } + } + + /* Discard the per-thread DBREG mutex. */ + if ((t_ret = __mutex_free(env, &dblp->mtx_dbreg)) != 0 && ret == 0) + ret = t_ret; + + /* Detach from the region. */ + if ((t_ret = __env_region_detach(env, reginfo, 0)) != 0 && ret == 0) + ret = t_ret; + + /* Close open files, release allocated memory. */ + if (dblp->lfhp != NULL) { + if ((t_ret = + __os_closehandle(env, dblp->lfhp)) != 0 && ret == 0) + ret = t_ret; + dblp->lfhp = NULL; + } + if (dblp->dbentry != NULL) + __os_free(env, dblp->dbentry); + + __os_free(env, dblp); + + env->lg_handle = NULL; + return (ret); +} + +/* + * __log_get_cached_ckp_lsn -- + * Retrieve any last checkpoint LSN that we may have found on startup. + * + * PUBLIC: int __log_get_cached_ckp_lsn __P((ENV *, DB_LSN *)); + */ +int +__log_get_cached_ckp_lsn(env, ckp_lsnp) + ENV *env; + DB_LSN *ckp_lsnp; +{ + DB_LOG *dblp; + LOG *lp; + + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + + LOG_SYSTEM_LOCK(env); + *ckp_lsnp = lp->cached_ckp_lsn; + LOG_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * __log_region_mutex_count -- + * Return the number of mutexes the log region will need. + * + * PUBLIC: u_int32_t __log_region_mutex_count __P((ENV *)); + */ +u_int32_t +__log_region_mutex_count(env) + ENV *env; +{ + /* + * We need a few assorted mutexes, and one per transaction waiting + * on the group commit list. We can't know how many that will be, + * but it should be bounded by the maximum active transactions. + */ + return (env->dbenv->tx_init + 5); +} + +/* + * __log_region_mutex_max -- + * Return the number of additional mutexes the log region will need. + * + * PUBLIC: u_int32_t __log_region_mutex_max __P((ENV *)); + */ +u_int32_t +__log_region_mutex_max(env) + ENV *env; +{ + DB_ENV *dbenv; + u_int32_t count; + + dbenv = env->dbenv; + + if ((count = dbenv->tx_max) == 0) + count = DEF_MAX_TXNS; + if (count < dbenv->tx_init) + return (0); + return (count - dbenv->tx_init); +} + +/* + * __log_region_size -- + * Return the amount of space needed for the log region. + * Make the region large enough to hold txn_max transaction + * detail structures plus some space to hold thread handles + * and the beginning of the alloc region and anything we + * need for mutex system resource recording. + * PUBLIC: size_t __log_region_size __P((ENV *)); + */ +size_t +__log_region_size(env) + ENV *env; +{ + DB_ENV *dbenv; + size_t s; + + dbenv = env->dbenv; + + /* Set the default buffer size, if not otherwise configured. */ + if (dbenv->lg_bsize == 0) + dbenv->lg_bsize = FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) ? + LG_BSIZE_INMEM : LG_BSIZE_DEFAULT; + + s = dbenv->lg_bsize; + /* Allocate the initial fileid allocation, plus some path name space. */ + s += dbenv->lg_fileid_init * __env_alloc_size((sizeof(FNAME)) + 16); + + return (s); +} +/* + * __log_region_max -- + * Return the amount of extra memory to allocate for logging informaition. + * PUBLIC: size_t __log_region_max __P((ENV *)); + */ +size_t +__log_region_max(env) + ENV *env; +{ + + DB_ENV *dbenv; + size_t s; + + dbenv = env->dbenv; + if (dbenv->lg_fileid_init == 0) { + if ((s = dbenv->lg_regionmax) == 0) + s = LG_BASE_REGION_SIZE; + } else if ((s = dbenv->lg_regionmax) != 0 && + s < dbenv->lg_fileid_init * (__env_alloc_size(sizeof(FNAME)) + 16)) + s = 0; + else if (s != 0) + s -= dbenv->lg_fileid_init * + (__env_alloc_size(sizeof(FNAME)) + 16); + + return (s); +} + +/* + * __log_vtruncate + * This is a virtual truncate. We set up the log indicators to + * make everyone believe that the given record is the last one in the + * log. Returns with the next valid LSN (i.e., the LSN of the next + * record to be written). This is used in replication to discard records + * in the log file that do not agree with the master. + * + * PUBLIC: int __log_vtruncate __P((ENV *, DB_LSN *, DB_LSN *, DB_LSN *)); + */ +int +__log_vtruncate(env, lsn, ckplsn, trunclsn) + ENV *env; + DB_LSN *lsn, *ckplsn, *trunclsn; +{ + DBT log_dbt; + DB_LOG *dblp; + DB_LOGC *logc; + LOG *lp; + u_int32_t bytes, len; + size_t offset; + int ret, t_ret; + + /* Need to find out the length of this soon-to-be-last record. */ + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + memset(&log_dbt, 0, sizeof(log_dbt)); + ret = __logc_get(logc, lsn, &log_dbt, DB_SET); + len = logc->len; + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + return (ret); + + /* Now do the truncate. */ + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + + LOG_SYSTEM_LOCK(env); + + /* + * Flush the log so we can simply initialize the in-memory buffer + * after the truncate. + */ + if ((ret = __log_flush_int(dblp, NULL, 0)) != 0) + goto err; + + lp->lsn = *lsn; + lp->len = len; + lp->lsn.offset += lp->len; + + offset = lp->b_off; + if (lp->db_log_inmemory && (ret = + __log_inmem_lsnoff(dblp, &lp->lsn, &offset)) != 0) { + lp->b_off = (db_size_t)offset; + goto err; + } + lp->b_off = (db_size_t)offset; + + /* + * I am going to assume that the number of bytes written since + * the last checkpoint doesn't exceed a 32-bit number. + */ + DB_ASSERT(env, lp->lsn.file >= ckplsn->file); + bytes = 0; + if (ckplsn->file != lp->lsn.file) { + bytes = lp->log_size - ckplsn->offset; + if (lp->lsn.file > ckplsn->file + 1) + bytes += lp->log_size * + ((lp->lsn.file - ckplsn->file) - 1); + bytes += lp->lsn.offset; + } else + bytes = lp->lsn.offset - ckplsn->offset; + + lp->stat.st_wc_mbytes += bytes / MEGABYTE; + lp->stat.st_wc_bytes += bytes % MEGABYTE; + + /* + * If the synced lsn is greater than our new end of log, reset it + * to our current end of log. + */ + MUTEX_LOCK(env, lp->mtx_flush); + if (LOG_COMPARE(&lp->s_lsn, lsn) > 0) + lp->s_lsn = lp->lsn; + MUTEX_UNLOCK(env, lp->mtx_flush); + + /* Initialize the in-region buffer to a pristine state. */ + ZERO_LSN(lp->f_lsn); + lp->w_off = lp->lsn.offset; + + if (trunclsn != NULL) + *trunclsn = lp->lsn; + + /* Truncate the log to the new point. */ + if ((ret = __log_zero(env, &lp->lsn)) != 0) + goto err; + +err: LOG_SYSTEM_UNLOCK(env); + return (ret); +} + +/* + * __log_is_outdated -- + * Used by the replication system to identify if a client's logs are too + * old. + * + * PUBLIC: int __log_is_outdated __P((ENV *, u_int32_t, int *)); + */ +int +__log_is_outdated(env, fnum, outdatedp) + ENV *env; + u_int32_t fnum; + int *outdatedp; +{ + DB_LOG *dblp; + LOG *lp; + char *name; + int ret; + u_int32_t cfile; + struct __db_filestart *filestart; + + dblp = env->lg_handle; + + /* + * The log represented by env is compared to the file number passed + * in fnum. If the log file fnum does not exist and is lower-numbered + * than the current logs, return *outdatedp non-zero, else we return 0. + */ + if (FLD_ISSET(env->dbenv->lg_flags, DB_LOG_IN_MEMORY)) { + LOG_SYSTEM_LOCK(env); + lp = (LOG *)dblp->reginfo.primary; + filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); + *outdatedp = filestart == NULL ? 0 : (fnum < filestart->file); + LOG_SYSTEM_UNLOCK(env); + return (0); + } + + *outdatedp = 0; + if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0) { + __os_free(env, name); + return (ret); + } + + /* If the file exists, we're just fine. */ + if (__os_exists(env, name, NULL) == 0) + goto out; + + /* + * It didn't exist, decide if the file number is too big or + * too little. If it's too little, then we need to indicate + * that the LSN is outdated. + */ + LOG_SYSTEM_LOCK(env); + lp = (LOG *)dblp->reginfo.primary; + cfile = lp->lsn.file; + LOG_SYSTEM_UNLOCK(env); + + if (cfile > fnum) + *outdatedp = 1; +out: __os_free(env, name); + return (ret); +} + +/* + * __log_zero -- + * Zero out the tail of a log after a truncate. + * + * PUBLIC: int __log_zero __P((ENV *, DB_LSN *)); + */ +int +__log_zero(env, from_lsn) + ENV *env; + DB_LSN *from_lsn; +{ + DB_FH *fhp; + DB_LOG *dblp; + LOG *lp; + struct __db_filestart *filestart, *nextstart; + size_t nbytes, len, nw; + u_int32_t fn, mbytes, bytes; + u_int8_t buf[4096]; + int ret; + char *fname; + + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + DB_ASSERT(env, LOG_COMPARE(from_lsn, &lp->lsn) <= 0); + if (LOG_COMPARE(from_lsn, &lp->lsn) > 0) { + __db_errx(env, DB_STR("2534", + "Warning: truncating to point beyond end of log")); + return (0); + } + + if (lp->db_log_inmemory) { + /* + * Remove the files that are invalidated by this truncate. + */ + for (filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); + filestart != NULL; filestart = nextstart) { + nextstart = SH_TAILQ_NEXT(filestart, + links, __db_filestart); + if (filestart->file > from_lsn->file) { + SH_TAILQ_REMOVE(&lp->logfiles, + filestart, links, __db_filestart); + SH_TAILQ_INSERT_HEAD(&lp->free_logfiles, + filestart, links, __db_filestart); + } + } + + return (0); + } + + /* Close any open file handles so unlinks don't fail. */ + if (dblp->lfhp != NULL) { + (void)__os_closehandle(env, dblp->lfhp); + dblp->lfhp = NULL; + } + + /* Throw away any extra log files that we have around. */ + for (fn = from_lsn->file + 1;; fn++) { + if (__log_name(dblp, fn, &fname, &fhp, DB_OSO_RDONLY) != 0) { + __os_free(env, fname); + break; + } + (void)__os_closehandle(env, fhp); + (void)time(&lp->timestamp); + ret = __os_unlink(env, fname, 0); + __os_free(env, fname); + if (ret != 0) + return (ret); + } + + /* We removed some log files; have to 0 to end of file. */ + if ((ret = + __log_name(dblp, from_lsn->file, &fname, &dblp->lfhp, 0)) != 0) { + __os_free(env, fname); + return (ret); + } + __os_free(env, fname); + if ((ret = __os_ioinfo(env, + NULL, dblp->lfhp, &mbytes, &bytes, NULL)) != 0) + goto err; + DB_ASSERT(env, (mbytes * MEGABYTE + bytes) >= from_lsn->offset); + len = (mbytes * MEGABYTE + bytes) - from_lsn->offset; + + memset(buf, 0, sizeof(buf)); + + /* Initialize the write position. */ + if ((ret = __os_seek(env, dblp->lfhp, 0, 0, from_lsn->offset)) != 0) + goto err; + + while (len > 0) { + nbytes = len > sizeof(buf) ? sizeof(buf) : len; + if ((ret = + __os_write(env, dblp->lfhp, buf, nbytes, &nw)) != 0) + goto err; + len -= nbytes; + } + +err: (void)__os_closehandle(env, dblp->lfhp); + dblp->lfhp = NULL; + + return (ret); +} + +/* + * __log_inmem_lsnoff -- + * Find the offset in the buffer of a given LSN. + * + * PUBLIC: int __log_inmem_lsnoff __P((DB_LOG *, DB_LSN *, size_t *)); + */ +int +__log_inmem_lsnoff(dblp, lsnp, offsetp) + DB_LOG *dblp; + DB_LSN *lsnp; + size_t *offsetp; +{ + LOG *lp; + struct __db_filestart *filestart; + + lp = (LOG *)dblp->reginfo.primary; + + SH_TAILQ_FOREACH(filestart, &lp->logfiles, links, __db_filestart) + if (filestart->file == lsnp->file) { + *offsetp = (u_int32_t) + (filestart->b_off + lsnp->offset) % lp->buffer_size; + return (0); + } + + return (DB_NOTFOUND); +} + +/* + * __log_inmem_newfile -- + * Records the offset of the beginning of a new file in the in-memory + * buffer. + * + * PUBLIC: int __log_inmem_newfile __P((DB_LOG *, u_int32_t)); + */ +int +__log_inmem_newfile(dblp, file) + DB_LOG *dblp; + u_int32_t file; +{ + HDR hdr; + LOG *lp; + struct __db_filestart *filestart; + int ret; +#ifdef DIAGNOSTIC + struct __db_filestart *first, *last; +#endif + + lp = (LOG *)dblp->reginfo.primary; + + /* + * If the log buffer is empty, reuse the filestart entry. + */ + filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); + if (filestart != NULL && + RINGBUF_LEN(lp, filestart->b_off, lp->b_off) <= + sizeof(HDR) + sizeof(LOGP)) { + filestart->file = file; + filestart->b_off = lp->b_off; + return (0); + } + + /* + * We write an empty header at the end of every in-memory log file. + * This is used during cursor traversal to indicate when to switch the + * LSN to the next file. + */ + if (file > 1) { + memset(&hdr, 0, sizeof(HDR)); + __log_inmem_copyin(dblp, lp->b_off, &hdr, sizeof(HDR)); + lp->b_off = (lp->b_off + sizeof(HDR)) % lp->buffer_size; + } + + filestart = SH_TAILQ_FIRST(&lp->free_logfiles, __db_filestart); + if (filestart == NULL) { + if ((ret = __env_alloc(&dblp->reginfo, + sizeof(struct __db_filestart), &filestart)) != 0) + return (ret); + memset(filestart, 0, sizeof(*filestart)); + } else + SH_TAILQ_REMOVE(&lp->free_logfiles, filestart, + links, __db_filestart); + + filestart->file = file; + filestart->b_off = lp->b_off; + +#ifdef DIAGNOSTIC + first = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); + last = SH_TAILQ_LAST(&(lp)->logfiles, links, __db_filestart); + + /* Check that we don't wrap. */ + DB_ASSERT(dblp->env, !first || first == last || + RINGBUF_LEN(lp, first->b_off, lp->b_off) == + RINGBUF_LEN(lp, first->b_off, last->b_off) + + RINGBUF_LEN(lp, last->b_off, lp->b_off)); +#endif + + SH_TAILQ_INSERT_TAIL(&lp->logfiles, filestart, links); + return (0); +} + +/* + * __log_inmem_chkspace -- + * Ensure that the requested amount of space is available in the buffer, + * and invalidate the region. + * Note: assumes that the region lock is held on entry. + * + * PUBLIC: int __log_inmem_chkspace __P((DB_LOG *, size_t)); + */ +int +__log_inmem_chkspace(dblp, len) + DB_LOG *dblp; + size_t len; +{ + DB_LSN active_lsn, old_active_lsn; + ENV *env; + LOG *lp; + struct __db_filestart *filestart; + size_t offset; + int ret; + + env = dblp->env; + lp = dblp->reginfo.primary; + + DB_ASSERT(env, lp->db_log_inmemory); + + /* + * Allow room for an extra header so that we don't need to check for + * space when switching files. + */ + len += sizeof(HDR); + + /* + * If transactions are enabled and we're about to fill available space, + * update the active LSN and recheck. If transactions aren't enabled, + * don't even bother checking: in that case we can always overwrite old + * log records, because we're never going to abort. + */ + while (TXN_ON(env) && + RINGBUF_LEN(lp, lp->b_off, lp->a_off) <= len) { + old_active_lsn = lp->active_lsn; + active_lsn = lp->lsn; + + /* + * Drop the log region lock so we don't hold it while + * taking the transaction region lock. + */ + LOG_SYSTEM_UNLOCK(env); + ret = __txn_getactive(env, &active_lsn); + LOG_SYSTEM_LOCK(env); + if (ret != 0) + return (ret); + active_lsn.offset = 0; + + /* If we didn't make any progress, give up. */ + if (LOG_COMPARE(&active_lsn, &old_active_lsn) == 0) { + __db_errx(env, DB_STR("2535", +"In-memory log buffer is full (an active transaction spans the buffer)")); + return (DB_LOG_BUFFER_FULL); + } + + /* Make sure we're moving the region LSN forwards. */ + if (LOG_COMPARE(&active_lsn, &lp->active_lsn) > 0) { + lp->active_lsn = active_lsn; + offset = lp->a_off; + (void)__log_inmem_lsnoff(dblp, &active_lsn, &offset); + lp->a_off = (db_size_t)offset; + } + } + + /* + * Remove the first file if it is invalidated by this write. + * Log records can't be bigger than a file, so we only need to + * check the first file. + */ + filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); + if (filestart != NULL && + RINGBUF_LEN(lp, lp->b_off, filestart->b_off) <= len) { + SH_TAILQ_REMOVE(&lp->logfiles, filestart, + links, __db_filestart); + SH_TAILQ_INSERT_HEAD(&lp->free_logfiles, filestart, + links, __db_filestart); + lp->f_lsn.file = filestart->file + 1; + } + + return (0); +} + +/* + * __log_inmem_copyout -- + * Copies the given number of bytes from the buffer -- no checking. + * Note: assumes that the region lock is held on entry. + * + * PUBLIC: void __log_inmem_copyout __P((DB_LOG *, size_t, void *, size_t)); + */ +void +__log_inmem_copyout(dblp, offset, buf, size) + DB_LOG *dblp; + size_t offset; + void *buf; + size_t size; +{ + LOG *lp; + size_t nbytes; + + lp = (LOG *)dblp->reginfo.primary; + nbytes = (offset + size < lp->buffer_size) ? + size : lp->buffer_size - offset; + memcpy(buf, dblp->bufp + offset, nbytes); + if (nbytes < size) + memcpy((u_int8_t *)buf + nbytes, dblp->bufp, size - nbytes); +} + +/* + * __log_inmem_copyin -- + * Copies the given number of bytes into the buffer -- no checking. + * Note: assumes that the region lock is held on entry. + * + * PUBLIC: void __log_inmem_copyin __P((DB_LOG *, size_t, void *, size_t)); + */ +void +__log_inmem_copyin(dblp, offset, buf, size) + DB_LOG *dblp; + size_t offset; + void *buf; + size_t size; +{ + LOG *lp; + size_t nbytes; + + lp = (LOG *)dblp->reginfo.primary; + nbytes = (offset + size < lp->buffer_size) ? + size : lp->buffer_size - offset; + memcpy(dblp->bufp + offset, buf, nbytes); + if (nbytes < size) + memcpy(dblp->bufp, (u_int8_t *)buf + nbytes, size - nbytes); +} + +/* + * __log_set_version -- + * Sets the current version of the log subsystem to the given version. + * Essentially this modifies the lp->persist.version field in the + * shared memory region. Called when region is initially created + * and when replication is starting up or finds a new master. + * + * PUBLIC: void __log_set_version __P((ENV *, u_int32_t)); + */ +void +__log_set_version(env, newver) + ENV *env; + u_int32_t newver; +{ + DB_LOG *dblp; + LOG *lp; + + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + /* + * We should be able to update this atomically without locking. + */ + lp->persist.version = newver; +} + +/* + * __log_get_oldversion -- + * Returns the last version of log that this environment was working + * with. Since there could be several versions of log files, if + * the user upgraded and didn't log archive, we check the version + * of the first log file, compare it to the last log file. If those + * are different, then there is an older log existing, and we then + * walk backward in the log files looking for the version of the + * most recent older log file. + * + * PUBLIC: int __log_get_oldversion __P((ENV *, u_int32_t *)); + */ +int +__log_get_oldversion(env, ver) + ENV *env; + u_int32_t *ver; +{ + DBT rec; + DB_LOG *dblp; + DB_LOGC *logc; + DB_LSN lsn; + LOG *lp; + u_int32_t firstfnum, fnum, lastver, oldver; + int ret, t_ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + logc = NULL; + ret = 0; + oldver = DB_LOGVERSION; + /* + * If we're in-memory logs we're always the current version. + */ + if (lp->db_log_inmemory) { + *ver = oldver; + return (0); + } + memset(&rec, 0, sizeof(rec)); + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + /* + * Get the version numbers of the first and last log files. + */ + if ((ret = __logc_get(logc, &lsn, &rec, DB_FIRST)) != 0) { + /* + * If there is no log file, we'll get DB_NOTFOUND. + * If we get that, set the version to the current. + */ + if (ret == DB_NOTFOUND) + ret = 0; + goto err; + } + firstfnum = lsn.file; + if ((ret = __logc_get(logc, &lsn, &rec, DB_LAST)) != 0) + goto err; + if ((ret = __log_valid(dblp, firstfnum, 0, NULL, 0, + NULL, &oldver)) != 0) + goto err; + /* + * If the first and last LSN are in the same file, then we + * already have the version in oldver. Return it. + */ + if (firstfnum == lsn.file) + goto err; + + /* + * Otherwise they're in different files and we call __log_valid + * to get the version numbers in both files. + */ + if ((ret = __log_valid(dblp, lsn.file, 0, NULL, 0, + NULL, &lastver)) != 0) + goto err; + /* + * If the version numbers are different, walk backward getting + * the version of each log file until we find one that is + * different than the last. + */ + if (oldver != lastver) { + for (fnum = lsn.file - 1; fnum >= firstfnum; fnum--) { + if ((ret = __log_valid(dblp, fnum, 0, NULL, 0, + NULL, &oldver)) != 0) + goto err; + if (oldver != lastver) + break; + } + } +err: if (logc != NULL && ((t_ret = __logc_close(logc)) != 0) && ret == 0) + ret = t_ret; + if (ret == 0 && ver != NULL) + *ver = oldver; + return (ret); +} diff --git a/src/log/log_archive.c b/src/log/log_archive.c new file mode 100644 index 00000000..280a2071 --- /dev/null +++ b/src/log/log_archive.c @@ -0,0 +1,643 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/log.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +static int __absname __P((ENV *, char *, char *, char **)); +static int __build_data __P((ENV *, char *, char ***)); +static int __cmpfunc __P((const void *, const void *)); +static int __usermem __P((ENV *, char ***)); + +/* + * __log_archive_pp -- + * ENV->log_archive pre/post processing. + * + * PUBLIC: int __log_archive_pp __P((DB_ENV *, char **[], u_int32_t)); + */ +int +__log_archive_pp(dbenv, listp, flags) + DB_ENV *dbenv; + char ***listp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_archive", DB_INIT_LOG); + +#undef OKFLAGS +#define OKFLAGS (DB_ARCH_ABS | DB_ARCH_DATA | DB_ARCH_LOG | DB_ARCH_REMOVE) + if (flags != 0) { + if ((ret = __db_fchk( + env, "DB_ENV->log_archive", flags, OKFLAGS)) != 0) + return (ret); + if ((ret = __db_fcchk(env, "DB_ENV->log_archive", + flags, DB_ARCH_DATA, DB_ARCH_LOG)) != 0) + return (ret); + if ((ret = __db_fcchk(env, "DB_ENV->log_archive", + flags, DB_ARCH_REMOVE, + DB_ARCH_ABS | DB_ARCH_DATA | DB_ARCH_LOG)) != 0) + return (ret); + } + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_archive(env, listp, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_archive -- + * ENV->log_archive. Internal. + * PUBLIC: int __log_archive __P((ENV *, char **[], u_int32_t)); + */ +int +__log_archive(env, listp, flags) + ENV *env; + char ***listp; + u_int32_t flags; +{ + DBT rec; + DB_LOG *dblp; + DB_LOGC *logc; + DB_LSN stable_lsn; + LOG *lp; + u_int array_size, n; + u_int32_t fnum; + int handle_check, ret, t_ret; + char **array, **arrayp, *name, *p, *pref; +#ifdef HAVE_GETCWD + char path[DB_MAXPATHLEN]; +#endif + + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + array = NULL; + name = NULL; + ret = 0; + COMPQUIET(fnum, 0); + + if (flags != DB_ARCH_REMOVE) + *listp = NULL; + + /* There are no log files if logs are in memory. */ + if (lp->db_log_inmemory) { + LF_CLR(~DB_ARCH_DATA); + if (flags == 0) + return (0); + } + + /* + * Check if the user wants the list of log files to remove and we're + * at a bad time in replication initialization. + */ + handle_check = 0; + if (!LF_ISSET(DB_ARCH_DATA) && + !LF_ISSET(DB_ARCH_LOG)) { + /* + * If we're locked out, just return success. No files + * can be archived right now. Any other error pass back + * to the caller. + */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __archive_rep_enter(env)) != 0) { + if (ret == DB_REP_LOCKOUT) + ret = 0; + return (ret); + } + } + + /* + * Prepend the original absolute pathname if the user wants an + * absolute path to the database environment directory. + */ +#ifdef HAVE_GETCWD + if (LF_ISSET(DB_ARCH_ABS)) { + /* + * XXX + * Can't trust getcwd(3) to set a valid errno, so don't display + * one unless we know it's good. It's likely a permissions + * problem: use something bland and useless in the default + * return value, so we don't send somebody off in the wrong + * direction. + */ + __os_set_errno(0); + if (getcwd(path, sizeof(path)) == NULL) { + ret = __os_get_errno(); + __db_err(env, ret, DB_STR("2570", + "no absolute path for the current directory")); + goto err; + } + pref = path; + } else +#endif + pref = NULL; + + LF_CLR(DB_ARCH_ABS); + switch (flags) { + case DB_ARCH_DATA: + ret = __build_data(env, pref, listp); + goto err; + case DB_ARCH_LOG: + memset(&rec, 0, sizeof(rec)); + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; +#ifdef UMRW + ZERO_LSN(stable_lsn); +#endif + ret = __logc_get(logc, &stable_lsn, &rec, DB_LAST); + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + fnum = stable_lsn.file; + break; + case DB_ARCH_REMOVE: + __log_autoremove(env); + goto err; + case 0: + + ret = __log_get_stable_lsn(env, &stable_lsn, 1); + /* + * A return of DB_NOTFOUND means the checkpoint LSN + * is before the beginning of the log files we have. + * This is not an error; it just means we're done. + */ + if (ret != 0) { + if (ret == DB_NOTFOUND) + ret = 0; + goto err; + } + /* Remove any log files before the last stable LSN. */ + fnum = stable_lsn.file - 1; + break; + default: + ret = __db_unknown_path(env, "__log_archive"); + goto err; + } + +#define LIST_INCREMENT 64 + /* Get some initial space. */ + array_size = 64; + if ((ret = __os_malloc(env, + sizeof(char *) * array_size, &array)) != 0) + goto err; + array[0] = NULL; + + /* Build an array of the file names. */ + for (n = 0; fnum > 0; --fnum) { + if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0) { + __os_free(env, name); + goto err; + } + if (__os_exists(env, name, NULL) != 0) { + __os_free(env, name); + name = NULL; + if (LF_ISSET(DB_ARCH_LOG) && fnum == stable_lsn.file) + continue; + break; + } + + if (n >= array_size - 2) { + array_size += LIST_INCREMENT; + if ((ret = __os_realloc(env, + sizeof(char *) * array_size, &array)) != 0) + goto err; + } + + if (pref != NULL) { + if ((ret = + __absname(env, pref, name, &array[n])) != 0) + goto err; + __os_free(env, name); + } else if ((p = __db_rpath(name)) != NULL) { + if ((ret = __os_strdup(env, p + 1, &array[n])) != 0) + goto err; + __os_free(env, name); + } else + array[n] = name; + + name = NULL; + array[++n] = NULL; + } + + /* If there's nothing to return, we're done. */ + if (n == 0) + goto err; + + /* Sort the list. */ + qsort(array, (size_t)n, sizeof(char *), __cmpfunc); + + /* Rework the memory. */ + if ((ret = __usermem(env, &array)) != 0) + goto err; + + if (listp != NULL) + *listp = array; + + if (0) { +err: if (array != NULL) { + for (arrayp = array; *arrayp != NULL; ++arrayp) + __os_free(env, *arrayp); + __os_free(env, array); + } + if (name != NULL) + __os_free(env, name); + } + if (handle_check && (t_ret = __archive_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __log_get_stable_lsn -- + * Get the stable lsn based on where checkpoints are. + * + * PUBLIC: int __log_get_stable_lsn __P((ENV *, DB_LSN *, int)); + */ +int +__log_get_stable_lsn(env, stable_lsn, group_wide) + ENV *env; + DB_LSN *stable_lsn; + int group_wide; +{ + DBT rec; + DB_LOGC *logc; + LOG *lp; + __txn_ckp_args *ckp_args; + int ret, t_ret; + + lp = env->lg_handle->reginfo.primary; + + ret = 0; + memset(&rec, 0, sizeof(rec)); + if (!TXN_ON(env)) { + if ((ret = __log_get_cached_ckp_lsn(env, stable_lsn)) != 0) + goto err; + /* + * No need to check for a return value of DB_NOTFOUND; + * __txn_findlastckp returns 0 if no checkpoint record + * is found. Instead of checking the return value, we + * check to see if the return LSN has been filled in. + */ + if (IS_ZERO_LSN(*stable_lsn) && (ret = + __txn_findlastckp(env, stable_lsn, NULL)) != 0) + goto err; + /* + * If the LSN has not been filled in return DB_NOTFOUND + * so that the caller knows it may be done. + */ + if (IS_ZERO_LSN(*stable_lsn)) { + ret = DB_NOTFOUND; + goto err; + } + } else if ((ret = __txn_getckp(env, stable_lsn)) != 0) + goto err; + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + /* + * Read checkpoint records until we find one that is on disk, + * then copy the ckp_lsn to the stable_lsn; + */ + while ((ret = __logc_get(logc, stable_lsn, &rec, DB_SET)) == 0 && + (ret = __txn_ckp_read(env, rec.data, &ckp_args)) == 0) { + if (stable_lsn->file < lp->s_lsn.file || + (stable_lsn->file == lp->s_lsn.file && + stable_lsn->offset < lp->s_lsn.offset)) { + *stable_lsn = ckp_args->ckp_lsn; + __os_free(env, ckp_args); + break; + } + *stable_lsn = ckp_args->last_ckp; + __os_free(env, ckp_args); + } + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; +#ifdef HAVE_REPLICATION_THREADS + /* + * If we have RepMgr, get the minimum group-aware LSN. + */ + if (group_wide && ret == 0 && REP_ON(env) && APP_IS_REPMGR(env) && + (t_ret = __repmgr_stable_lsn(env, stable_lsn)) != 0) + ret = t_ret; +#else + COMPQUIET(group_wide, 0); +#endif +err: + return (ret); +} + +/* + * __log_autoremove -- + * Delete any non-essential log files. + * + * PUBLIC: void __log_autoremove __P((ENV *)); + */ +void +__log_autoremove(env) + ENV *env; +{ + int ret; + char **begin, **list; + + /* + * Complain if there's an error, but don't return the error to our + * caller. Auto-remove is done when writing a log record, and we + * don't want to fail a write, which could fail the corresponding + * committing transaction, for a permissions error. + */ + if ((ret = __log_archive(env, &list, DB_ARCH_ABS)) != 0) { + if (ret != DB_NOTFOUND) + __db_err(env, ret, DB_STR("2571", + "log file auto-remove")); + return; + } + + /* Remove the files. */ + if (list != NULL) { + for (begin = list; *list != NULL; ++list) + (void)__os_unlink(env, *list, 0); + __os_ufree(env, begin); + } +} + +/* + * __build_data -- + * Build a list of datafiles for return. + */ +static int +__build_data(env, pref, listp) + ENV *env; + char *pref, ***listp; +{ + DBT rec; + DB_LOGC *logc; + DB_LSN lsn; + __dbreg_register_args *argp; + u_int array_size, last, n, nxt; + u_int32_t rectype; + int ret, t_ret; + char **array, **arrayp, **list, **lp, *p, *real_name; + + /* Get some initial space. */ + array_size = 64; + if ((ret = __os_malloc(env, + sizeof(char *) * array_size, &array)) != 0) + return (ret); + array[0] = NULL; + + memset(&rec, 0, sizeof(rec)); + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + for (n = 0; (ret = __logc_get(logc, &lsn, &rec, DB_PREV)) == 0;) { + if (rec.size < sizeof(rectype)) { + ret = EINVAL; + __db_errx(env, DB_STR("2572", + "DB_ENV->log_archive: bad log record")); + break; + } + + LOGCOPY_32(env, &rectype, rec.data); + if (rectype != DB___dbreg_register) + continue; + if ((ret = + __dbreg_register_read(env, rec.data, &argp)) != 0) { + ret = EINVAL; + __db_errx(env, DB_STR("2573", + "DB_ENV->log_archive: unable to read log record")); + break; + } + + if (n >= array_size - 2) { + array_size += LIST_INCREMENT; + if ((ret = __os_realloc(env, + sizeof(char *) * array_size, &array)) != 0) + goto free_continue; + } + + if ((ret = __os_strdup(env, + argp->name.data, &array[n++])) != 0) + goto free_continue; + array[n] = NULL; + + if (argp->ftype == DB_QUEUE) { + if ((ret = __qam_extent_names(env, + argp->name.data, &list)) != 0) + goto q_err; + for (lp = list; + lp != NULL && *lp != NULL; lp++) { + if (n >= array_size - 2) { + array_size += LIST_INCREMENT; + if ((ret = __os_realloc(env, + sizeof(char *) * + array_size, &array)) != 0) + goto q_err; + } + if ((ret = + __os_strdup(env, *lp, &array[n++])) != 0) + goto q_err; + array[n] = NULL; + } +q_err: if (list != NULL) + __os_free(env, list); + } +free_continue: __os_free(env, argp); + if (ret != 0) + break; + } + if (ret == DB_NOTFOUND) + ret = 0; + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err1; + + /* If there's nothing to return, we're done. */ + if (n == 0) { + ret = 0; + *listp = NULL; + goto err1; + } + + /* Sort the list. */ + qsort(array, (size_t)n, sizeof(char *), __cmpfunc); + + /* + * Build the real pathnames, discarding nonexistent files and + * duplicates. + */ + for (last = nxt = 0; nxt < n;) { + /* + * Discard duplicates. Last is the next slot we're going + * to return to the user, nxt is the next slot that we're + * going to consider. + */ + if (last != nxt) { + array[last] = array[nxt]; + array[nxt] = NULL; + } + for (++nxt; nxt < n && + strcmp(array[last], array[nxt]) == 0; ++nxt) { + __os_free(env, array[nxt]); + array[nxt] = NULL; + } + + /* Get the real name. */ + if ((ret = __db_appname(env, + DB_APP_DATA, array[last], NULL, &real_name)) != 0) + goto err2; + + /* If the file doesn't exist, ignore it. */ + if (__os_exists(env, real_name, NULL) != 0) { + __os_free(env, real_name); + __os_free(env, array[last]); + array[last] = NULL; + continue; + } + + /* Rework the name as requested by the user. */ + __os_free(env, array[last]); + array[last] = NULL; + if (pref != NULL) { + ret = __absname(env, pref, real_name, &array[last]); + __os_free(env, real_name); + if (ret != 0) + goto err2; + } else if ((p = __db_rpath(real_name)) != NULL) { + ret = __os_strdup(env, p + 1, &array[last]); + __os_free(env, real_name); + if (ret != 0) + goto err2; + } else + array[last] = real_name; + ++last; + } + + /* NULL-terminate the list. */ + array[last] = NULL; + + /* Rework the memory. */ + if ((ret = __usermem(env, &array)) != 0) + goto err1; + + *listp = array; + return (0); + +err2: /* + * XXX + * We've possibly inserted NULLs into the array list, so clean up a + * bit so that the other error processing works. + */ + if (array != NULL) + for (; nxt < n; ++nxt) + __os_free(env, array[nxt]); + /* FALLTHROUGH */ + +err1: if (array != NULL) { + for (arrayp = array; *arrayp != NULL; ++arrayp) + __os_free(env, *arrayp); + __os_free(env, array); + } + return (ret); +} + +/* + * __absname -- + * Return an absolute path name for the file. + */ +static int +__absname(env, pref, name, newnamep) + ENV *env; + char *pref, *name, **newnamep; +{ + size_t l_pref, l_name; + int isabspath, ret; + char *newname; + + l_name = strlen(name); + isabspath = __os_abspath(name); + l_pref = isabspath ? 0 : strlen(pref); + + /* Malloc space for concatenating the two. */ + if ((ret = __os_malloc(env, + l_pref + l_name + 2, &newname)) != 0) + return (ret); + *newnamep = newname; + + /* Build the name. If `name' is an absolute path, ignore any prefix. */ + if (!isabspath) { + memcpy(newname, pref, l_pref); + if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL) + newname[l_pref++] = PATH_SEPARATOR[0]; + } + memcpy(newname + l_pref, name, l_name + 1); + + return (0); +} + +/* + * __usermem -- + * Create a single chunk of memory that holds the returned information. + * If the user has their own malloc routine, use it. + */ +static int +__usermem(env, listp) + ENV *env; + char ***listp; +{ + size_t len; + int ret; + char **array, **arrayp, **orig, *strp; + + /* Find out how much space we need. */ + for (len = 0, orig = *listp; *orig != NULL; ++orig) + len += sizeof(char *) + strlen(*orig) + 1; + len += sizeof(char *); + + /* Allocate it and set up the pointers. */ + if ((ret = __os_umalloc(env, len, &array)) != 0) + return (ret); + + strp = (char *)(array + (orig - *listp) + 1); + + /* Copy the original information into the new memory. */ + for (orig = *listp, arrayp = array; *orig != NULL; ++orig, ++arrayp) { + len = strlen(*orig); + memcpy(strp, *orig, len + 1); + *arrayp = strp; + strp += len + 1; + + __os_free(env, *orig); + } + + /* NULL-terminate the list. */ + *arrayp = NULL; + + __os_free(env, *listp); + *listp = array; + + return (0); +} + +static int +__cmpfunc(p1, p2) + const void *p1, *p2; +{ + return (strcmp(*((char * const *)p1), *((char * const *)p2))); +} diff --git a/src/log/log_compare.c b/src/log/log_compare.c new file mode 100644 index 00000000..97b59338 --- /dev/null +++ b/src/log/log_compare.c @@ -0,0 +1,66 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" + +/* + * log_compare -- + * Compare two LSN's; return 1, 0, -1 if first is >, == or < second. + * + * EXTERN: int log_compare __P((const DB_LSN *, const DB_LSN *)); + */ +int +log_compare(lsn0, lsn1) + const DB_LSN *lsn0, *lsn1; +{ + return (LOG_COMPARE(lsn0, lsn1)); +} + +/* + * __log_check_page_lsn -- + * Panic if the page's lsn in past the end of the current log. + * + * PUBLIC: int __log_check_page_lsn __P((ENV *, DB *, DB_LSN *)); + */ +int +__log_check_page_lsn(env, dbp, lsnp) + ENV *env; + DB *dbp; + DB_LSN *lsnp; +{ + LOG *lp; + int ret; + + lp = env->lg_handle->reginfo.primary; + LOG_SYSTEM_LOCK(env); + + ret = LOG_COMPARE(lsnp, &lp->lsn); + + LOG_SYSTEM_UNLOCK(env); + + if (ret < 0) + return (0); + + __db_errx(env, DB_STR_A("2506", + "file %s has LSN %lu/%lu, past end of log at %lu/%lu", + "%s %lu %lu %lu %lu"), + dbp == NULL || + dbp->fname == NULL ? DB_STR_P("unknown") : dbp->fname, + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)lp->lsn.file, (u_long)lp->lsn.offset); + __db_errx(env, DB_STR("2507", + "Commonly caused by moving a database from one database environment")); + __db_errx(env, DB_STR("2508", + "to another without clearing the database LSNs, or by removing all of")); + __db_errx(env, DB_STR("2509", + "the log files from a database environment")); + return (EINVAL); +} diff --git a/src/log/log_debug.c b/src/log/log_debug.c new file mode 100644 index 00000000..32fb2542 --- /dev/null +++ b/src/log/log_debug.c @@ -0,0 +1,146 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +static int __log_printf_int __P((ENV *, DB_TXN *, const char *, va_list)); + +/* + * __log_printf_capi -- + * Write a printf-style format string into the DB log. + * + * PUBLIC: int __log_printf_capi __P((DB_ENV *, DB_TXN *, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4))); + */ +int +#ifdef STDC_HEADERS +__log_printf_capi(DB_ENV *dbenv, DB_TXN *txnid, const char *fmt, ...) +#else +__log_printf_capi(dbenv, txnid, fmt, va_alist) + DB_ENV *dbenv; + DB_TXN *txnid; + const char *fmt; + va_dcl +#endif +{ + va_list ap; + int ret; + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + ret = __log_printf_pp(dbenv, txnid, fmt, ap); + va_end(ap); + + return (ret); +} + +/* + * __log_printf_pp -- + * Handle the arguments and call an internal routine to do the work. + * + * The reason this routine isn't just folded into __log_printf_capi + * is because the C++ API has to call a C API routine, and you can + * only pass variadic arguments to a single routine. + * + * PUBLIC: int __log_printf_pp + * PUBLIC: __P((DB_ENV *, DB_TXN *, const char *, va_list)); + */ +int +__log_printf_pp(dbenv, txnid, fmt, ap) + DB_ENV *dbenv; + DB_TXN *txnid; + const char *fmt; + va_list ap; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_printf", DB_INIT_LOG); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_printf_int(env, txnid, fmt, ap)), 0, ret); + va_end(ap); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_printf -- + * Write a printf-style format string into the DB log. + * + * PUBLIC: int __log_printf __P((ENV *, DB_TXN *, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4))); + */ +int +#ifdef STDC_HEADERS +__log_printf(ENV *env, DB_TXN *txnid, const char *fmt, ...) +#else +__log_printf(env, txnid, fmt, va_alist) + ENV *env; + DB_TXN *txnid; + const char *fmt; + va_dcl +#endif +{ + va_list ap; + int ret; + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + ret = __log_printf_int(env, txnid, fmt, ap); + va_end(ap); + + return (ret); +} + +/* + * __log_printf_int -- + * Write a printf-style format string into the DB log (internal). + */ +static int +__log_printf_int(env, txnid, fmt, ap) + ENV *env; + DB_TXN *txnid; + const char *fmt; + va_list ap; +{ + DBT opdbt, msgdbt; + DB_LSN lsn; + char __logbuf[2048]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + + if (!DBENV_LOGGING(env)) { + __db_errx(env, DB_STR("2510", + "Logging not currently permitted")); + return (EAGAIN); + } + + memset(&opdbt, 0, sizeof(opdbt)); + opdbt.data = "DIAGNOSTIC"; + opdbt.size = sizeof("DIAGNOSTIC") - 1; + + memset(&msgdbt, 0, sizeof(msgdbt)); + msgdbt.data = __logbuf; + msgdbt.size = (u_int32_t)vsnprintf(__logbuf, sizeof(__logbuf), fmt, ap); + + return (__db_debug_log( + env, txnid, &lsn, 0, &opdbt, -1, &msgdbt, NULL, 0)); +} diff --git a/src/log/log_get.c b/src/log/log_get.c new file mode 100644 index 00000000..db30c969 --- /dev/null +++ b/src/log/log_get.c @@ -0,0 +1,1626 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/hmac.h" +#include "dbinc/log.h" +#include "dbinc/hash.h" + +typedef enum { L_ALREADY, L_ACQUIRED, L_NONE } RLOCK; + +static int __logc_close_pp __P((DB_LOGC *, u_int32_t)); +static int __logc_get_pp __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); +static int __logc_get_int __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); +static int __logc_hdrchk __P((DB_LOGC *, DB_LSN *, HDR *, int *)); +static int __logc_incursor __P((DB_LOGC *, DB_LSN *, HDR *, u_int8_t **)); +static int __logc_inregion __P((DB_LOGC *, + DB_LSN *, RLOCK *, DB_LSN *, HDR *, u_int8_t **, int *)); +static int __logc_io __P((DB_LOGC *, + u_int32_t, u_int32_t, void *, size_t *, int *)); +static int __logc_ondisk __P((DB_LOGC *, + DB_LSN *, DB_LSN *, u_int32_t, HDR *, u_int8_t **, int *)); +static int __logc_set_maxrec __P((DB_LOGC *, char *)); +static int __logc_shortread __P((DB_LOGC *, DB_LSN *, int)); +static int __logc_version_pp __P((DB_LOGC *, u_int32_t *, u_int32_t)); + +/* + * __log_cursor_pp -- + * ENV->log_cursor + * + * PUBLIC: int __log_cursor_pp __P((DB_ENV *, DB_LOGC **, u_int32_t)); + */ +int +__log_cursor_pp(dbenv, logcp, flags) + DB_ENV *dbenv; + DB_LOGC **logcp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_cursor", DB_INIT_LOG); + + /* Validate arguments. */ + if ((ret = __db_fchk(env, "DB_ENV->log_cursor", flags, 0)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_cursor(env, logcp)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_cursor -- + * Create a log cursor. + * + * PUBLIC: int __log_cursor __P((ENV *, DB_LOGC **)); + */ +int +__log_cursor(env, logcp) + ENV *env; + DB_LOGC **logcp; +{ + DB_LOGC *logc; + int ret; + + *logcp = NULL; + + /* Allocate memory for the cursor. */ + if ((ret = __os_calloc(env, 1, sizeof(DB_LOGC), &logc)) != 0) + return (ret); + + logc->bp_size = LG_CURSOR_BUF_SIZE; + /* + * Set this to something positive. + */ + logc->bp_maxrec = MEGABYTE; + if ((ret = __os_malloc(env, logc->bp_size, &logc->bp)) != 0) { + __os_free(env, logc); + return (ret); + } + + logc->env = env; + logc->close = __logc_close_pp; + logc->get = __logc_get_pp; + logc->version = __logc_version_pp; + + *logcp = logc; + return (0); +} + +/* + * __logc_close_pp -- + * DB_LOGC->close pre/post processing. + */ +static int +__logc_close_pp(logc, flags) + DB_LOGC *logc; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = logc->env; + + if ((ret = __db_fchk(env, "DB_LOGC->close", flags, 0)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__logc_close(logc)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __logc_close -- + * DB_LOGC->close. + * + * PUBLIC: int __logc_close __P((DB_LOGC *)); + */ +int +__logc_close(logc) + DB_LOGC *logc; +{ + ENV *env; + + env = logc->env; + + if (logc->fhp != NULL) { + (void)__os_closehandle(env, logc->fhp); + logc->fhp = NULL; + } + + if (logc->dbt.data != NULL) + __os_free(env, logc->dbt.data); + + __os_free(env, logc->bp); + __os_free(env, logc); + + return (0); +} + +/* + * __logc_version_pp -- + * DB_LOGC->version. + */ +static int +__logc_version_pp(logc, versionp, flags) + DB_LOGC *logc; + u_int32_t *versionp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = logc->env; + + if ((ret = __db_fchk(env, "DB_LOGC->version", flags, 0)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__logc_version(logc, versionp)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __logc_version -- + * DB_LOGC->version. + * + * PUBLIC: int __logc_version __P((DB_LOGC *, u_int32_t *)); + */ +int +__logc_version(logc, versionp) + DB_LOGC *logc; + u_int32_t *versionp; +{ + DBT hdrdbt; + DB_LOGC *plogc; + DB_LSN plsn; + ENV *env; + LOGP *persist; + int ret, t_ret; + + env = logc->env; + if (IS_ZERO_LSN(logc->lsn)) { + __db_errx(env, DB_STR("2574", "DB_LOGC->get: unset cursor")); + return (EINVAL); + } + ret = 0; + /* + * Check if the persist info we have is for the same file + * as the current cursor position. If we already have the + * information, then we're done. If not, we open a new + * log cursor and get the header. + * + * Since most users walk forward through the log when + * using this feature (i.e. printlog) we're likely to + * have the information we need. + */ + if (logc->lsn.file != logc->p_lsn.file) { + if ((ret = __log_cursor(env, &plogc)) != 0) + return (ret); + plsn.file = logc->lsn.file; + plsn.offset = 0; + plogc->lsn = plsn; + memset(&hdrdbt, 0, sizeof(DBT)); + if ((ret = __logc_get_int(plogc, + &plsn, &hdrdbt, DB_SET)) == 0) { + persist = (LOGP *)hdrdbt.data; + if (LOG_SWAPPED(env)) + __log_persistswap(persist); + logc->p_lsn = logc->lsn; + logc->p_version = persist->version; + } + if ((t_ret = __logc_close(plogc)) != 0 && ret == 0) + ret = t_ret; + } + /* Return the version. */ + if (ret == 0) + *versionp = logc->p_version; + return (ret); +} + +/* + * __logc_get_pp -- + * DB_LOGC->get pre/post processing. + */ +static int +__logc_get_pp(logc, alsn, dbt, flags) + DB_LOGC *logc; + DB_LSN *alsn; + DBT *dbt; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = logc->env; + + /* Validate arguments. */ + switch (flags) { + case DB_CURRENT: + case DB_FIRST: + case DB_LAST: + case DB_NEXT: + case DB_PREV: + break; + case DB_SET: + if (IS_ZERO_LSN(*alsn)) { + __db_errx(env, DB_STR_A("2575", + "DB_LOGC->get: invalid LSN: %lu/%lu", "%lu %lu"), + (u_long)alsn->file, (u_long)alsn->offset); + return (EINVAL); + } + break; + default: + return (__db_ferr(env, "DB_LOGC->get", 1)); + } + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__logc_get(logc, alsn, dbt, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __logc_get -- + * DB_LOGC->get. + * + * PUBLIC: int __logc_get __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); + */ +int +__logc_get(logc, alsn, dbt, flags) + DB_LOGC *logc; + DB_LSN *alsn; + DBT *dbt; + u_int32_t flags; +{ + DB_LSN saved_lsn; + ENV *env; + LOGP *persist; + int ret; + + env = logc->env; + + /* + * On error, we take care not to overwrite the caller's LSN. This + * is because callers looking for the end of the log loop using the + * DB_NEXT flag, and expect to take the last successful lsn out of + * the passed-in structure after DB_LOGC->get fails with DB_NOTFOUND. + * + * !!! + * This line is often flagged an uninitialized memory read during a + * Purify or similar tool run, as the application didn't initialize + * *alsn. If the application isn't setting the DB_SET flag, there is + * no reason it should have initialized *alsn, but we can't know that + * and we want to make sure we never overwrite whatever the application + * put in there. + */ + saved_lsn = *alsn; + /* + * If we get one of the log's header records as a result of doing a + * DB_FIRST, DB_NEXT, DB_LAST or DB_PREV, repeat the operation, log + * file header records aren't useful to applications. + */ + if ((ret = __logc_get_int(logc, alsn, dbt, flags)) != 0) { + *alsn = saved_lsn; + return (ret); + } + /* + * The DBT was populated by the call to __logc_get_int, copy the data + * out of DB_DBT_USERMEM space if it is there. + */ + if ((ret = __dbt_usercopy(env, dbt)) != 0) + return (ret); + + if (alsn->offset == 0 && (flags == DB_FIRST || + flags == DB_NEXT || flags == DB_LAST || flags == DB_PREV)) { + switch (flags) { + case DB_FIRST: + flags = DB_NEXT; + break; + case DB_LAST: + flags = DB_PREV; + break; + case DB_NEXT: + case DB_PREV: + default: + break; + } + /* + * If we're walking the log and we find a persist header + * then store so that we may use it later if needed. + */ + persist = (LOGP *)dbt->data; + if (LOG_SWAPPED(env)) + __log_persistswap(persist); + logc->p_lsn = *alsn; + logc->p_version = persist->version; + if (F_ISSET(dbt, DB_DBT_MALLOC)) { + __os_free(env, dbt->data); + dbt->data = NULL; + } + if ((ret = __logc_get_int(logc, alsn, dbt, flags)) != 0) { + *alsn = saved_lsn; + goto err; + } + } + +err: __dbt_userfree(env, dbt, NULL, NULL); + return (ret); +} + +/* + * __logc_get_int -- + * Get a log record; internal version. + */ +static int +__logc_get_int(logc, alsn, dbt, flags) + DB_LOGC *logc; + DB_LSN *alsn; + DBT *dbt; + u_int32_t flags; +{ + DB_CIPHER *db_cipher; + DB_LOG *dblp; + DB_LSN last_lsn, nlsn; + ENV *env; + HDR hdr; + LOG *lp; + RLOCK rlock; + logfile_validity status; + u_int32_t cnt, logfsz, orig_flags; + u_int8_t *rp; + int eof, is_hmac, need_cksum, ret; + size_t blen; +#ifdef HAVE_LOG_CHECKSUM + u_int32_t i, logtype, version; + char chksumbuf[256]; + u_int8_t ch; +#endif + + env = logc->env; + db_cipher = env->crypto_handle; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + eof = is_hmac = 0; + orig_flags = flags; /* flags may be altered later. */ + blen = 0; + logfsz = lp->persist.log_size; + + /* + * We don't acquire the log region lock until we need it, and we + * release it as soon as we're done. + */ + rlock = F_ISSET(logc, DB_LOG_LOCKED) ? L_ALREADY : L_NONE; + +#ifdef HAVE_LOG_CHECKSUM +nextrec: +#endif + nlsn = logc->lsn; + switch (flags) { + case DB_NEXT: /* Next log record. */ + if (!IS_ZERO_LSN(nlsn)) { + /* Increment the cursor by the cursor record size. */ + nlsn.offset += logc->len; + break; + } + flags = DB_FIRST; + /* FALLTHROUGH */ + case DB_FIRST: /* First log record. */ + /* Find the first log file. */ + if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0) + goto err; + + /* + * DB_LV_INCOMPLETE: + * Theoretically, the log file we want could be created + * but not yet written, the "first" log record must be + * in the log buffer. + * DB_LV_NORMAL: + * DB_LV_OLD_READABLE: + * We found a log file we can read. + * DB_LV_NONEXISTENT: + * No log files exist, the "first" log record must be in + * the log buffer. + * DB_LV_OLD_UNREADABLE: + * No readable log files exist, we're at the cross-over + * point between two versions. The "first" log record + * must be in the log buffer. + */ + switch (status) { + case DB_LV_INCOMPLETE: + DB_ASSERT(env, lp->lsn.file == cnt); + /* FALLTHROUGH */ + case DB_LV_NORMAL: + case DB_LV_OLD_READABLE: + nlsn.file = cnt; + break; + case DB_LV_NONEXISTENT: + nlsn.file = 1; + DB_ASSERT(env, lp->lsn.file == nlsn.file); + break; + case DB_LV_OLD_UNREADABLE: + nlsn.file = cnt + 1; + DB_ASSERT(env, lp->lsn.file == nlsn.file); + break; + } + nlsn.offset = 0; + break; + case DB_CURRENT: /* Current log record. */ + break; + case DB_PREV: /* Previous log record. */ + if (!IS_ZERO_LSN(nlsn)) { + /* If at start-of-file, move to the previous file. */ + if (nlsn.offset == 0) { + if (nlsn.file == 1) { + ret = DB_NOTFOUND; + goto err; + } + if ((!lp->db_log_inmemory && + (__log_valid(dblp, nlsn.file - 1, 0, NULL, + 0, &status, NULL) != 0 || + (status != DB_LV_NORMAL && + status != DB_LV_OLD_READABLE)))) { + ret = DB_NOTFOUND; + goto err; + } + + --nlsn.file; + } + nlsn.offset = logc->prev; + break; + } + /* FALLTHROUGH */ + case DB_LAST: /* Last log record. */ + if (rlock == L_NONE) { + rlock = L_ACQUIRED; + LOG_SYSTEM_LOCK(env); + } + nlsn.file = lp->lsn.file; + nlsn.offset = lp->lsn.offset - lp->len; + break; + case DB_SET: /* Set log record. */ + nlsn = *alsn; + break; + default: + ret = __db_unknown_path(env, "__logc_get_int"); + goto err; + } + + if (0) { /* Move to the next file. */ +next_file: ++nlsn.file; + nlsn.offset = 0; + } + + /* + * The above switch statement should have set nlsn to the lsn of + * the requested record. + */ + + if (CRYPTO_ON(env)) { + hdr.size = HDR_CRYPTO_SZ; + is_hmac = 1; + } else { + hdr.size = HDR_NORMAL_SZ; + is_hmac = 0; + } + + /* + * Check to see if the record is in the cursor's buffer -- if so, + * we'll need to checksum it. + */ + if ((ret = __logc_incursor(logc, &nlsn, &hdr, &rp)) != 0) + goto err; + if (rp != NULL) + goto cksum; + + /* + * Look to see if we're moving backward in the log with the last record + * coming from the disk -- it means the record can't be in the region's + * buffer. Else, check the region's buffer. + * + * If the record isn't in the region's buffer, then either logs are + * in-memory, and we're done, or we're going to have to read the + * record from disk. We want to make a point of not reading past the + * end of the logical log (after recovery, there may be data after the + * end of the logical log, not to mention the log file may have been + * pre-allocated). So, zero out last_lsn, and initialize it inside + * __logc_inregion -- if it's still zero when we check it in + * __logc_ondisk, that's OK, it just means the logical end of the log + * isn't an issue for this request. + */ + ZERO_LSN(last_lsn); + if (!F_ISSET(logc, DB_LOG_DISK) || + LOG_COMPARE(&nlsn, &logc->lsn) > 0) { + F_CLR(logc, DB_LOG_DISK); + + if ((ret = __logc_inregion(logc, + &nlsn, &rlock, &last_lsn, &hdr, &rp, &need_cksum)) != 0) + goto err; + if (rp != NULL) { + /* + * If we read the entire record from the in-memory log + * buffer, we don't need to checksum it, nor do we need + * to worry about vtruncate issues. + */ + if (need_cksum) + goto cksum; + goto from_memory; + } + if (lp->db_log_inmemory) + goto nohdr; + } + + /* + * We have to read from an on-disk file to retrieve the record. + * If we ever can't retrieve the record at offset 0, we're done, + * return EOF/DB_NOTFOUND. + * + * Discard the region lock if we're still holding it, the on-disk + * reading routines don't need it. + */ + if (rlock == L_ACQUIRED) { + rlock = L_NONE; + LOG_SYSTEM_UNLOCK(env); + } + if ((ret = __logc_ondisk( + logc, &nlsn, &last_lsn, flags, &hdr, &rp, &eof)) != 0) + goto err; + + /* + * If we got a 0-length record, that means we're in the midst of some + * bytes that got 0'd as the result of a vtruncate. In that case or at + * the end of a file, with DB_NEXT we're going to have to retry. + */ + if (eof || hdr.len == 0) { +nohdr: switch (flags) { + case DB_LAST: + case DB_PREV: + /* + * We should never get here. If we recover a log + * file with 0's at the end, we'll treat the 0'd + * headers as the end of log and ignore them. If + * we're reading backwards from another file, then + * the first record in that new file should have its + * prev field set correctly. + * First check that the file exists. + */ + if (eof && logc->bp_lsn.file != nlsn.file) + __db_errx(env, DB_STR_A("2583", + "Log file %d not found, check log directory configuration", "%d"), + nlsn.file); + else + __db_errx(env, DB_STR("2576", + "Encountered zero length records while traversing backwards")); + ret = __env_panic(env, DB_RUNRECOVERY); + goto err; + case DB_FIRST: + case DB_NEXT: + /* + * Zero'd records always indicate the end of a file, + * but only go to the next file once. + */ + if (nlsn.offset != 0) + goto next_file; + /* FALLTHROUGH */ + case DB_SET: + default: + ret = DB_NOTFOUND; + goto err; + } + } + + F_SET(logc, DB_LOG_DISK); + +cksum: /* + * Discard the region lock if we're still holding it. (The path to + * get here is we acquired the region lock because of the caller's + * flag argument, but we found the record in the in-memory or cursor + * buffers. Improbable, but it's easy to avoid.) + */ + if (rlock == L_ACQUIRED) { + rlock = L_NONE; + LOG_SYSTEM_UNLOCK(env); + } +#ifdef HAVE_LOG_CHECKSUM + /* + * Checksum: there are two types of errors -- a configuration error + * or a checksum mismatch. The former is always bad. The latter is + * OK if we're searching for the end of the log, and very, very bad + * if we're reading random log records. + */ + if ((ret = __db_check_chksum(env, &hdr, db_cipher, + hdr.chksum, rp + hdr.size, hdr.len - hdr.size, is_hmac)) != 0) { + /* + * This might be a log whose checksum does not include the hdr. + * Try again without the header, either for logs whose version + * is pre-DB_LOGCHKSUM, or for the persist record which contains + * the log version. Check for the zero offset first to avoid + * unwanted recursion in __logc_version(). + * + * Set the cursor to the LSN we are trying to look at. + */ + last_lsn = logc->lsn; + logc->lsn = nlsn; + if ((logc->lsn.offset == 0 || + (__logc_version(logc, &version) == 0 && + version < DB_LOGCHKSUM)) && + __db_check_chksum(env, NULL, db_cipher, hdr.chksum, + rp + hdr.size, hdr.len - hdr.size, is_hmac) == 0) { + logc->lsn = last_lsn; + goto from_memory; + } + + /* + * If we are iterating logs during log verification and basic + * header info is correct, we can skip the failed log record + * and goto next one. + */ + if (F_ISSET(logc->env->lg_handle, DBLOG_VERIFYING) && + (orig_flags == DB_FIRST || orig_flags == DB_LAST || + orig_flags == DB_PREV || orig_flags == DB_NEXT) && + hdr.size > 0 && hdr.len > hdr.size && hdr.len < logfsz && + (((flags == DB_FIRST || flags == DB_NEXT) && + hdr.prev == last_lsn.offset) || + ((flags == DB_PREV || flags == DB_LAST) && + last_lsn.offset - hdr.len == nlsn.offset))) { + + flags = orig_flags; + + logc->lsn = nlsn; + logc->len = hdr.len; + logc->prev = hdr.prev; + + if (flags == DB_LAST) + flags = DB_PREV; + else if (flags == DB_FIRST) + flags = DB_NEXT; + + memset(chksumbuf, 0, 256); + blen = 0; + for (i = 0; i < DB_MAC_KEY && blen < 256; i++) { + ch = hdr.chksum[i]; + blen = strlen(chksumbuf); + snprintf(chksumbuf + blen, 255 - blen, + isprint(ch) || + ch == 0x0a ? "%c" : "%#x ", ch); + } + /* Type field is always the first one in the record. */ + memcpy(&logtype, rp + hdr.size, sizeof(logtype)); + __db_errx(env, DB_STR_A("2577", + "DB_LOGC->get: log record LSN %lu/%lu: " + "checksum mismatch, hdr.chksum: %s, hdr.prev: %u, " + "hdr.len: %u, log type: %u. Skipping it and " + "continuing with the %s one", + "%lu %lu %s %u %u %u %s"), + (u_long)nlsn.file, (u_long)nlsn.offset, chksumbuf, + hdr.prev, hdr.len, logtype, flags == DB_NEXT ? + DB_STR_P("next") : DB_STR_P("previous")); + goto nextrec; + } + + if (F_ISSET(logc, DB_LOG_SILENT_ERR)) { + if (ret == -1) + ret = EIO; + } else if (ret == -1) { + __db_errx(env, DB_STR_A("2578", + "DB_LOGC->get: log record LSN %lu/%lu: checksum mismatch", + "%lu %lu"), (u_long)nlsn.file, (u_long)nlsn.offset); + __db_errx(env, DB_STR("2579", + "DB_LOGC->get: catastrophic recovery may be required")); + ret = __env_panic(env, DB_RUNRECOVERY); + } + logc->lsn = last_lsn; + goto err; + } +#endif + +from_memory: + /* + * Discard the region lock if we're still holding it. (The path to + * get here is we acquired the region lock because of the caller's + * flag argument, but we found the record in the in-memory or cursor + * buffers. Improbable, but it's easy to avoid.) + */ + if (rlock == L_ACQUIRED) { + rlock = L_NONE; + LOG_SYSTEM_UNLOCK(env); + } + + /* Copy the record into the user's DBT. */ + if ((ret = __db_retcopy(env, dbt, rp + hdr.size, + (u_int32_t)(hdr.len - hdr.size), + &logc->dbt.data, &logc->dbt.ulen)) != 0) + goto err; + + if (CRYPTO_ON(env)) { + if ((ret = db_cipher->decrypt(env, db_cipher->data, + hdr.iv, dbt->data, hdr.len - hdr.size)) != 0) { + ret = EAGAIN; + goto err; + } + /* + * Return the original log record size to the user, + * even though we've allocated more than that, possibly. + * The log record is decrypted in the user dbt, not in + * the buffer, so we must do this here after decryption, + * not adjust the len passed to the __db_retcopy call. + */ + dbt->size = hdr.orig_size; + } + + /* Update the cursor and the returned LSN. */ + *alsn = nlsn; + logc->lsn = nlsn; + logc->len = hdr.len; + logc->prev = hdr.prev; + +err: if (rlock == L_ACQUIRED) + LOG_SYSTEM_UNLOCK(env); + + return (ret); +} + +/* + * __logc_incursor -- + * Check to see if the requested record is in the cursor's buffer. + */ +static int +__logc_incursor(logc, lsn, hdr, pp) + DB_LOGC *logc; + DB_LSN *lsn; + HDR *hdr; + u_int8_t **pp; +{ + ENV *env; + u_int8_t *p; + int eof; + + env = logc->env; + *pp = NULL; + + /* + * Test to see if the requested LSN could be part of the cursor's + * buffer. + * + * The record must be part of the same file as the cursor's buffer. + * The record must start at a byte offset equal to or greater than + * the cursor buffer. + * The record must not start at a byte offset after the cursor + * buffer's end. + */ + if (logc->bp_lsn.file != lsn->file) + return (0); + if (logc->bp_lsn.offset > lsn->offset) + return (0); + if (logc->bp_lsn.offset + logc->bp_rlen <= lsn->offset + hdr->size) + return (0); + + /* + * Read the record's header and check if the record is entirely held + * in the buffer. If the record is not entirely held, get it again. + * (The only advantage in having part of the record locally is that + * we might avoid a system call because we already have the HDR in + * memory.) + * + * If the header check fails for any reason, it must be because the + * LSN is bogus. Fail hard. + */ + p = logc->bp + (lsn->offset - logc->bp_lsn.offset); + memcpy(hdr, p, hdr->size); + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + if (__logc_hdrchk(logc, lsn, hdr, &eof)) + return (DB_NOTFOUND); + if (eof || logc->bp_lsn.offset + logc->bp_rlen < lsn->offset + hdr->len) + return (0); + + *pp = p; /* Success. */ + + return (0); +} + +/* + * __logc_inregion -- + * Check to see if the requested record is in the region's buffer. + */ +static int +__logc_inregion(logc, lsn, rlockp, last_lsn, hdr, pp, need_cksump) + DB_LOGC *logc; + DB_LSN *lsn, *last_lsn; + RLOCK *rlockp; + HDR *hdr; + u_int8_t **pp; + int *need_cksump; +{ + DB_LOG *dblp; + ENV *env; + LOG *lp; + size_t b_region, len, nr; + u_int32_t b_disk; + int eof, ret; + u_int8_t *p; + + env = logc->env; + dblp = env->lg_handle; + lp = env->lg_handle->reginfo.primary; + + ret = 0; + b_region = 0; + *pp = NULL; + *need_cksump = 0; + + /* If we haven't yet acquired the log region lock, do so. */ + if (*rlockp == L_NONE) { + *rlockp = L_ACQUIRED; + LOG_SYSTEM_LOCK(env); + } + + /* + * The routines to read from disk must avoid reading past the logical + * end of the log, so pass that information back to it. + * + * Since they're reading directly from the disk, they must also avoid + * reading past the offset we've written out. If the log was + * truncated, it's possible that there are zeroes or garbage on + * disk after this offset, and the logical end of the log can + * come later than this point if the log buffer isn't empty. + */ + *last_lsn = lp->lsn; + if (!lp->db_log_inmemory && last_lsn->offset > lp->w_off) + last_lsn->offset = lp->w_off; + + /* + * Test to see if the requested LSN could be part of the region's + * buffer. + * + * During recovery, we read the log files getting the information to + * initialize the region. In that case, the region's lsn field will + * not yet have been filled in, use only the disk. + * + * The record must not start at a byte offset after the region buffer's + * end, since that means the request is for a record after the end of + * the log. Do this test even if the region's buffer is empty -- after + * recovery, the log files may continue past the declared end-of-log, + * and the disk reading routine will incorrectly attempt to read the + * remainder of the log. + * + * Otherwise, test to see if the region's buffer actually has what we + * want: + * + * The buffer must have some useful content. + * The record must be in the same file as the region's buffer and must + * start at a byte offset equal to or greater than the region's buffer. + */ + if (IS_ZERO_LSN(lp->lsn)) + return (0); + if (LOG_COMPARE(lsn, &lp->lsn) >= 0) + return (DB_NOTFOUND); + else if (lp->db_log_inmemory) { + if ((ret = __log_inmem_lsnoff(dblp, lsn, &b_region)) != 0) + return (ret); + } else if (lp->b_off == 0 || LOG_COMPARE(lsn, &lp->f_lsn) < 0) + return (0); + + /* + * The current contents of the cursor's buffer will be useless for a + * future call, we're about to overwrite it -- trash it rather than + * try and make it look correct. + */ + logc->bp_rlen = 0; + + /* + * If the requested LSN is greater than the region buffer's first + * byte, we know the entire record is in the buffer on a good LSN. + * + * If we're given a bad LSN, the "entire" record might not be in + * our buffer in order to fail at the chksum. __logc_hdrchk made + * sure our dest buffer fits, via bp_maxrec, but we also need to + * make sure we don't run off the end of this buffer, the src. + * + * There is one case where the header check can fail: on a scan through + * in-memory logs, when we reach the end of a file we can read an empty + * header. In that case, it's safe to return zero, here: it will be + * caught in our caller. Otherwise, the LSN is bogus. Fail hard. + */ + if (lp->db_log_inmemory || LOG_COMPARE(lsn, &lp->f_lsn) > 0) { + if (!lp->db_log_inmemory) + b_region = lsn->offset - lp->w_off; + __log_inmem_copyout(dblp, b_region, hdr, hdr->size); + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + if (__logc_hdrchk(logc, lsn, hdr, &eof) != 0) + return (DB_NOTFOUND); + if (eof) + return (0); + if (lp->db_log_inmemory) { + if (RINGBUF_LEN(lp, b_region, lp->b_off) < hdr->len) + return (DB_NOTFOUND); + } else if (lsn->offset + hdr->len > lp->w_off + lp->buffer_size) + return (DB_NOTFOUND); + if (logc->bp_size <= hdr->len) { + len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128); + if ((ret = + __os_realloc(logc->env, len, &logc->bp)) != 0) + return (ret); + logc->bp_size = (u_int32_t)len; + } + __log_inmem_copyout(dblp, b_region, logc->bp, hdr->len); + *pp = logc->bp; + return (0); + } + + DB_ASSERT(env, !lp->db_log_inmemory); + + /* + * There's a partial record, that is, the requested record starts + * in a log file and finishes in the region buffer. We have to + * find out how many bytes of the record are in the region buffer + * so we can copy them out into the cursor buffer. First, check + * to see if the requested record is the only record in the region + * buffer, in which case we should copy the entire region buffer. + * + * Else, walk back through the region's buffer to find the first LSN + * after the record that crosses the buffer boundary -- we can detect + * that LSN, because its "prev" field will reference the record we + * want. The bytes we need to copy from the region buffer are the + * bytes up to the record we find. The bytes we'll need to allocate + * to hold the log record are the bytes between the two offsets. + */ + b_disk = lp->w_off - lsn->offset; + if (lp->b_off <= lp->len) + b_region = (u_int32_t)lp->b_off; + else + for (p = dblp->bufp + (lp->b_off - lp->len);;) { + memcpy(hdr, p, hdr->size); + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + if (hdr->prev == lsn->offset) { + b_region = (u_int32_t)(p - dblp->bufp); + break; + } + p = dblp->bufp + (hdr->prev - lp->w_off); + } + + /* + * If we don't have enough room for the record, we have to allocate + * space. We have to do it while holding the region lock, which is + * truly annoying, but there's no way around it. This call is why + * we allocate cursor buffer space when allocating the cursor instead + * of waiting. + */ + if (logc->bp_size <= b_region + b_disk) { + len = (size_t)DB_ALIGN((uintmax_t)(b_region + b_disk) * 2, 128); + if ((ret = __os_realloc(logc->env, len, &logc->bp)) != 0) + return (ret); + logc->bp_size = (u_int32_t)len; + } + + /* Copy the region's bytes to the end of the cursor's buffer. */ + p = (logc->bp + logc->bp_size) - b_region; + memcpy(p, dblp->bufp, b_region); + + /* Release the region lock. */ + if (*rlockp == L_ACQUIRED) { + *rlockp = L_NONE; + LOG_SYSTEM_UNLOCK(env); + } + + /* + * Read the rest of the information from disk. Neither short reads + * or EOF are acceptable, the bytes we want had better be there. + */ + if (b_disk != 0) { + p -= b_disk; + nr = b_disk; + if ((ret = __logc_io( + logc, lsn->file, lsn->offset, p, &nr, NULL)) != 0) + return (ret); + if (nr < b_disk) + return (__logc_shortread(logc, lsn, 0)); + + /* We read bytes from the disk, we'll need to checksum them. */ + *need_cksump = 1; + } + + /* Copy the header information into the caller's structure. */ + memcpy(hdr, p, hdr->size); + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + + *pp = p; + return (0); +} + +/* + * __log_hdrswap -- + * Swap the bytes in a log header from machines with different endianness. + * + * PUBLIC: void __log_hdrswap __P((HDR *, int)); + */ +void +__log_hdrswap(hdr, is_hmac) + HDR *hdr; + int is_hmac; +{ + M_32_SWAP(hdr->prev); + M_32_SWAP(hdr->len); + if (!is_hmac) + P_32_SWAP(hdr->chksum); +} + +/* + * __log_persistswap -- + * Swap the bytes in a log file persistent header from machines with + * different endianness. + * + * PUBLIC: void __log_persistswap __P((LOGP *)); + */ +void +__log_persistswap(persist) + LOGP *persist; +{ + M_32_SWAP(persist->magic); + M_32_SWAP(persist->version); + M_32_SWAP(persist->log_size); + M_32_SWAP(persist->notused); +} + +/* + * __logc_ondisk -- + * Read a record off disk. + */ +static int +__logc_ondisk(logc, lsn, last_lsn, flags, hdr, pp, eofp) + DB_LOGC *logc; + DB_LSN *lsn, *last_lsn; + u_int32_t flags; + int *eofp; + HDR *hdr; + u_int8_t **pp; +{ + ENV *env; + size_t len, nr; + u_int32_t offset; + int ret; + + env = logc->env; + *eofp = 0; + + nr = hdr->size; + if ((ret = + __logc_io(logc, lsn->file, lsn->offset, hdr, &nr, eofp)) != 0) + return (ret); + if (*eofp) + return (0); + + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + + /* + * If the read was successful, but we can't read a full header, assume + * we've hit EOF. We can't check that the header has been partially + * zeroed out, but it's unlikely that this is caused by a write failure + * since the header is written as a single write call and it's less + * than sector. + */ + if (nr < hdr->size) { + *eofp = 1; + return (0); + } + + /* Check the HDR. */ + if ((ret = __logc_hdrchk(logc, lsn, hdr, eofp)) != 0) + return (ret); + if (*eofp) + return (0); + + /* + * Regardless of how we return, the previous contents of the cursor's + * buffer are useless -- trash it. + */ + logc->bp_rlen = 0; + + /* + * Otherwise, we now (finally!) know how big the record is. (Maybe + * we should have just stuck the length of the record into the LSN!?) + * Make sure we have enough space. + */ + if (logc->bp_size <= hdr->len) { + len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128); + if ((ret = __os_realloc(env, len, &logc->bp)) != 0) + return (ret); + logc->bp_size = (u_int32_t)len; + } + + /* + * If we're moving forward in the log file, read this record in at the + * beginning of the buffer. Otherwise, read this record in at the end + * of the buffer, making sure we don't try and read before the start + * of the file. (We prefer positioning at the end because transaction + * aborts use DB_SET to move backward through the log and we might get + * lucky.) + * + * Read a buffer's worth, without reading past the logical EOF. The + * last_lsn may be a zero LSN, but that's OK, the test works anyway. + */ + if (flags == DB_FIRST || flags == DB_NEXT) + offset = lsn->offset; + else if (lsn->offset + hdr->len < logc->bp_size) + offset = 0; + else + offset = (lsn->offset + hdr->len) - logc->bp_size; + + nr = logc->bp_size; + if (lsn->file == last_lsn->file && offset + nr >= last_lsn->offset) + nr = last_lsn->offset - offset; + + if ((ret = + __logc_io(logc, lsn->file, offset, logc->bp, &nr, eofp)) != 0) + return (ret); + + /* + * We should have at least gotten the bytes up-to-and-including the + * record we're reading. + */ + if (nr < (lsn->offset + hdr->len) - offset) + return (__logc_shortread(logc, lsn, 1)); + + /* + * Set up the return information. + * + * !!! + * No need to set the bp_lsn.file field, __logc_io set it for us. + */ + logc->bp_rlen = (u_int32_t)nr; + logc->bp_lsn.offset = offset; + + *pp = logc->bp + (lsn->offset - offset); + + return (0); +} + +/* + * __logc_hdrchk -- + * + * Check for corrupted HDRs before we use them to allocate memory or find + * records. + * + * If the log files were pre-allocated, a zero-filled HDR structure is the + * logical file end. However, we can see buffers filled with 0's during + * recovery, too (because multiple log buffers were written asynchronously, + * and one made it to disk before a different one that logically precedes + * it in the log file. + * + * Check for impossibly large records. The malloc should fail later, but we + * have customers that run mallocs that treat all allocation failures as fatal + * errors. + * + * Note that none of this is necessarily something awful happening. We let + * the application hand us any LSN they want, and it could be a pointer into + * the middle of a log record, there's no way to tell. + */ +static int +__logc_hdrchk(logc, lsn, hdr, eofp) + DB_LOGC *logc; + DB_LSN *lsn; + HDR *hdr; + int *eofp; +{ + ENV *env; + int ret; + + env = logc->env; + + /* + * Check EOF before we do any other processing. + */ + if (eofp != NULL) { + if (hdr->prev == 0 && hdr->chksum[0] == 0 && hdr->len == 0) { + *eofp = 1; + return (0); + } + *eofp = 0; + } + + /* + * Sanity check the log record's size. + * We must check it after "virtual" EOF above. + */ + if (hdr->len <= hdr->size) + goto err; + + /* + * If the cursor's max-record value isn't yet set, it means we aren't + * reading these records from a log file and no check is necessary. + */ + if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) { + /* + * If we fail the check, there's the pathological case that + * we're reading the last file, it's growing, and our initial + * check information was wrong. Get it again, to be sure. + */ + if ((ret = __logc_set_maxrec(logc, NULL)) != 0) { + __db_err(env, ret, "DB_LOGC->get"); + return (ret); + } + if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) + goto err; + } + return (0); + +err: if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) + __db_errx(env, DB_STR_A("2580", + "DB_LOGC->get: LSN %lu/%lu: invalid log record header", + "%lu %lu"), (u_long)lsn->file, (u_long)lsn->offset); + return (EIO); +} + +/* + * __logc_io -- + * Read records from a log file. + */ +static int +__logc_io(logc, fnum, offset, p, nrp, eofp) + DB_LOGC *logc; + u_int32_t fnum, offset; + void *p; + size_t *nrp; + int *eofp; +{ + DB_LOG *dblp; + ENV *env; + LOG *lp; + int ret; + char *np; + + env = logc->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + /* + * If we've switched files, discard the current file handle and acquire + * a new one. + */ + if (logc->fhp != NULL && logc->bp_lsn.file != fnum) { + ret = __os_closehandle(env, logc->fhp); + logc->fhp = NULL; + logc->bp_lsn.file = 0; + + if (ret != 0) + return (ret); + } + if (logc->fhp == NULL) { + if ((ret = __log_name(dblp, fnum, + &np, &logc->fhp, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) { + /* + * If we're allowed to return EOF, assume that's the + * problem, set the EOF status flag and return 0. + */ + if (eofp != NULL) { + *eofp = 1; + ret = 0; + } else if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) + __db_err(env, ret, "DB_LOGC->get: %s", + np == NULL ? "__log_name failed" : np); + __os_free(env, np); + return (ret); + } + + if ((ret = __logc_set_maxrec(logc, np)) != 0) { + __db_err(env, ret, "DB_LOGC->get: %s", np); + __os_free(env, np); + return (ret); + } + __os_free(env, np); + + logc->bp_lsn.file = fnum; + } + + STAT_INC(env, log, read, lp->stat.st_rcount, fnum); + /* Seek to the record's offset and read the data. */ + if ((ret = __os_io(env, DB_IO_READ, + logc->fhp, 0, 0, offset, (u_int32_t)*nrp, p, nrp)) != 0) { + if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) + __db_err(env, ret, DB_STR_A("2581", + "DB_LOGC->get: LSN: %lu/%lu: read", "%lu %lu"), + (u_long)fnum, (u_long)offset); + return (ret); + } + + return (0); +} + +/* + * __logc_shortread -- + * Read was short -- return a consistent error message and error. + */ +static int +__logc_shortread(logc, lsn, check_silent) + DB_LOGC *logc; + DB_LSN *lsn; + int check_silent; +{ + if (!check_silent || !F_ISSET(logc, DB_LOG_SILENT_ERR)) + __db_errx(logc->env, DB_STR_A("2582", + "DB_LOGC->get: LSN: %lu/%lu: short read", "%lu %lu"), + (u_long)lsn->file, (u_long)lsn->offset); + return (EIO); +} + +/* + * __logc_set_maxrec -- + * Bound the maximum log record size in a log file. + */ +static int +__logc_set_maxrec(logc, np) + DB_LOGC *logc; + char *np; +{ + DB_LOG *dblp; + ENV *env; + LOG *lp; + u_int32_t mbytes, bytes; + int ret; + + env = logc->env; + dblp = env->lg_handle; + + /* + * We don't want to try and allocate huge chunks of memory because + * applications with error-checking malloc's often consider that a + * hard failure. If we're about to look at a corrupted record with + * a bizarre size, we need to know before trying to allocate space + * to hold it. We could read the persistent data at the beginning + * of the file but that's hard -- we may have to decrypt it, checksum + * it and so on. Stat the file instead. + */ + if (logc->fhp != NULL) { + if ((ret = __os_ioinfo(env, np, logc->fhp, + &mbytes, &bytes, NULL)) != 0) + return (ret); + if (logc->bp_maxrec < (mbytes * MEGABYTE + bytes)) + logc->bp_maxrec = mbytes * MEGABYTE + bytes; + } + + /* + * If reading from the log file currently being written, we could get + * an incorrect size, that is, if the cursor was opened on the file + * when it had only a few hundred bytes, and then the cursor used to + * move forward in the file, after more log records were written, the + * original stat value would be wrong. Use the maximum of the current + * log file size and the size of the buffer -- that should represent + * the max of any log record currently in the file. + * + * The log buffer size is set when the environment is opened and never + * changed, we don't need a lock on it. + */ + lp = dblp->reginfo.primary; + if (logc->bp_maxrec < lp->buffer_size) + logc->bp_maxrec = lp->buffer_size; + + return (0); +} + +/* + * PUBLIC: int __log_read_record_pp __P((DB_ENV *, DB **, void *, void *, + * PUBLIC: DB_LOG_RECSPEC *, u_int32_t, void **)); + */ +int +__log_read_record_pp(dbenv, dbpp, td, recbuf, spec, size, argpp) + DB_ENV *dbenv; + DB **dbpp; + void *td; + void *recbuf; + DB_LOG_RECSPEC *spec; + u_int32_t size; + void **argpp; +{ + DB_THREAD_INFO *ip; + int ret; + + ENV_REQUIRES_CONFIG(dbenv->env, + dbenv->env->lg_handle, "DB_ENV->log_read_record", DB_INIT_LOG); + + *argpp = NULL; + ENV_ENTER(dbenv->env, ip); + if ((ret = __os_umalloc(dbenv->env, size + sizeof(DB_TXN), argpp)) != 0) + goto done; + REPLICATION_WRAP(dbenv->env, (__log_read_record(dbenv->env, dbpp, + td, recbuf, spec, size, argpp)), 0, ret); + if (ret != 0) { + __os_ufree(dbenv->env, *argpp); + *argpp = NULL; + } +done: ENV_LEAVE(dbenv->env, ip); + return (ret); +} + +/* + * PUBLIC: int __log_read_record __P((ENV *, DB **, void *, void *, + * PUBLIC: DB_LOG_RECSPEC *, u_int32_t, void **)); + */ +int +__log_read_record(env, dbpp, td, recbuf, spec, size, argpp) + ENV *env; + DB **dbpp; + void *td; + void *recbuf; + DB_LOG_RECSPEC *spec; + u_int32_t size; + void **argpp; +{ + DB_LOG_RECSPEC *sp, *np; + DB_TXN *txnp; + LOG *lp; + PAGE *hdrstart; + u_int32_t hdrsize, op, uinttmp; + u_int8_t *ap, *bp; + int has_data, ret, downrev; + + COMPQUIET(has_data, 0); + COMPQUIET(hdrsize, 0); + COMPQUIET(hdrstart, NULL); + COMPQUIET(op, 0); + ap = *argpp; + /* + * Allocate space for the arg structure and a transaction + * structure which will imediately follow it. + */ + if (ap == NULL && + (ret = __os_malloc(env, size + sizeof(DB_TXN), &ap)) != 0) + return (ret); + txnp = (DB_TXN *)(ap + size); + memset(txnp, 0, sizeof(DB_TXN)); + txnp->td = td; + lp = env->lg_handle->reginfo.primary; + downrev = lp->persist.version < DB_LOGVERSION_50; + + bp = recbuf; + + /* + * The first three fields are always the same in every arg + * struct so we know their offsets. + */ + /* type */ + LOGCOPY_32(env, ap + SSZ(LOG_REC_HEADER, type), bp); + bp += sizeof(u_int32_t); + + /* txnp */ + LOGCOPY_32(env, &txnp->txnid, bp); + *(DB_TXN **)(ap + SSZ(LOG_REC_HEADER, txnp)) = txnp; + bp += sizeof(txnp->txnid); + + /* Previous LSN */ + LOGCOPY_TOLSN(env, + (DB_LSN *)(ap + SSZ(LOG_REC_HEADER, prev_lsn)), bp); + bp += sizeof(DB_LSN); + + ret = 0; + for (sp = spec; sp->type != LOGREC_Done; sp++) { + switch (sp->type) { + case LOGREC_DB: + LOGCOPY_32(env, &uinttmp, bp); + *(u_int32_t*)(ap + sp->offset) = uinttmp; + bp += sizeof(uinttmp); + if (dbpp != NULL) { + *dbpp = NULL; + ret = __dbreg_id_to_db(env, + txnp, dbpp, (int32_t)uinttmp, 1); + } + break; + + case LOGREC_ARG: + case LOGREC_TIME: + case LOGREC_DBOP: + LOGCOPY_32(env, ap + sp->offset, bp); + bp += sizeof(uinttmp); + break; + case LOGREC_OP: + LOGCOPY_32(env, &op, bp); + *(u_int32_t *)(ap + sp->offset) = op; + bp += sizeof(uinttmp); + break; + case LOGREC_DBT: + case LOGREC_PGLIST: + case LOGREC_LOCKS: + case LOGREC_HDR: + case LOGREC_DATA: + case LOGREC_PGDBT: + case LOGREC_PGDDBT: + memset(ap + sp->offset, 0, sizeof(DBT)); + LOGCOPY_32(env, &uinttmp, bp); + *(u_int32_t*) + (ap + sp->offset + SSZ(DBT, size)) = uinttmp; + bp += sizeof(u_int32_t); + *(void **)(ap + sp->offset + SSZ(DBT, data)) = bp; + + /* Process fields that need to be byte swapped. */ + switch (sp->type) { + case LOGREC_DBT: + case LOGREC_PGLIST: + case LOGREC_LOCKS: + break; + case LOGREC_HDR: + if (uinttmp == 0) + break; + has_data = 0; + for (np = sp + 1; np->type != LOGREC_Done; np++) + if (np->type == LOGREC_DATA) { + has_data = 1; + break; + } + hdrstart = (PAGE *)bp; + hdrsize = uinttmp; + if (has_data == 1) + break; + /* FALLTHROUGH */ + case LOGREC_DATA: + if (downrev ? LOG_SWAPPED(env) : + (dbpp != NULL && *dbpp != NULL && + F_ISSET(*dbpp, DB_AM_SWAP))) + __db_recordswap(op, hdrsize, + hdrstart, has_data ? + ap + sp->offset : NULL, 1); + break; + case LOGREC_PGDBT: + has_data = 0; + for (np = sp + 1; np->type != LOGREC_Done; np++) + if (np->type == LOGREC_PGDDBT) { + has_data = 1; + break; + } + + hdrstart = (PAGE *)bp; + hdrsize = uinttmp; + if (has_data == 1) + break; + /* FALLTHROUGH */ + case LOGREC_PGDDBT: + if (dbpp != NULL && *dbpp != NULL && + (downrev ? LOG_SWAPPED(env) : + F_ISSET(*dbpp, DB_AM_SWAP)) && + (ret = __db_pageswap(env, *dbpp, hdrstart, + hdrsize, has_data == 0 ? NULL : + (DBT *)(ap + sp->offset), 1)) != 0) + return (ret); + break; + default: + DB_ASSERT(env, sp->type != sp->type); + } + + bp += uinttmp; + break; + + case LOGREC_POINTER: + LOGCOPY_TOLSN(env, (DB_LSN *)(ap + sp->offset), bp); + bp += sizeof(DB_LSN); + break; + + default: + DB_ASSERT(env, sp->type != sp->type); + } + } + + *argpp = ap; + return (ret); +} diff --git a/src/log/log_method.c b/src/log/log_method.c new file mode 100644 index 00000000..d5aec116 --- /dev/null +++ b/src/log/log_method.c @@ -0,0 +1,533 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" + +/* + * __log_env_create -- + * Log specific initialization of the DB_ENV structure. + * + * PUBLIC: int __log_env_create __P((DB_ENV *)); + */ +int +__log_env_create(dbenv) + DB_ENV *dbenv; +{ + /* + * !!! + * Our caller has not yet had the opportunity to reset the panic + * state or turn off mutex locking, and so we can neither check + * the panic state or acquire a mutex in the DB_ENV create path. + */ + dbenv->lg_bsize = 0; + dbenv->lg_regionmax = 0; + + return (0); +} + +/* + * __log_env_destroy -- + * Log specific destruction of the DB_ENV structure. + * + * PUBLIC: void __log_env_destroy __P((DB_ENV *)); + */ +void +__log_env_destroy(dbenv) + DB_ENV *dbenv; +{ + COMPQUIET(dbenv, NULL); +} + +/* + * PUBLIC: int __log_get_lg_bsize __P((DB_ENV *, u_int32_t *)); + */ +int +__log_get_lg_bsize(dbenv, lg_bsizep) + DB_ENV *dbenv; + u_int32_t *lg_bsizep; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->get_lg_bsize", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *lg_bsizep = + ((LOG *)env->lg_handle->reginfo.primary)->buffer_size; + } else + *lg_bsizep = dbenv->lg_bsize; + return (0); +} + +/* + * __log_set_lg_bsize -- + * DB_ENV->set_lg_bsize. + * + * PUBLIC: int __log_set_lg_bsize __P((DB_ENV *, u_int32_t)); + */ +int +__log_set_lg_bsize(dbenv, lg_bsize) + DB_ENV *dbenv; + u_int32_t lg_bsize; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lg_bsize"); + + dbenv->lg_bsize = lg_bsize; + return (0); +} + +/* + * PUBLIC: int __log_get_lg_filemode __P((DB_ENV *, int *)); + */ +int +__log_get_lg_filemode(dbenv, lg_modep) + DB_ENV *dbenv; + int *lg_modep; +{ + DB_LOG *dblp; + DB_THREAD_INFO *ip; + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->get_lg_filemode", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + dblp = env->lg_handle; + ENV_ENTER(env, ip); + LOG_SYSTEM_LOCK(env); + *lg_modep = ((LOG *)dblp->reginfo.primary)->filemode; + LOG_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + *lg_modep = dbenv->lg_filemode; + + return (0); +} + +/* + * __log_set_lg_filemode -- + * DB_ENV->set_lg_filemode. + * + * PUBLIC: int __log_set_lg_filemode __P((DB_ENV *, int)); + */ +int +__log_set_lg_filemode(dbenv, lg_mode) + DB_ENV *dbenv; + int lg_mode; +{ + DB_LOG *dblp; + DB_THREAD_INFO *ip; + ENV *env; + LOG *lp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->set_lg_filemode", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ENV_ENTER(env, ip); + LOG_SYSTEM_LOCK(env); + lp->filemode = lg_mode; + LOG_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + dbenv->lg_filemode = lg_mode; + + return (0); +} + +/* + * PUBLIC: int __log_get_lg_max __P((DB_ENV *, u_int32_t *)); + */ +int +__log_get_lg_max(dbenv, lg_maxp) + DB_ENV *dbenv; + u_int32_t *lg_maxp; +{ + DB_LOG *dblp; + DB_THREAD_INFO *ip; + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->get_lg_max", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + dblp = env->lg_handle; + ENV_ENTER(env, ip); + LOG_SYSTEM_LOCK(env); + *lg_maxp = ((LOG *)dblp->reginfo.primary)->log_nsize; + LOG_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + *lg_maxp = dbenv->lg_size; + + return (0); +} + +/* + * __log_set_lg_max -- + * DB_ENV->set_lg_max. + * + * PUBLIC: int __log_set_lg_max __P((DB_ENV *, u_int32_t)); + */ +int +__log_set_lg_max(dbenv, lg_max) + DB_ENV *dbenv; + u_int32_t lg_max; +{ + DB_LOG *dblp; + DB_THREAD_INFO *ip; + ENV *env; + LOG *lp; + int ret; + + env = dbenv->env; + ret = 0; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->set_lg_max", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ENV_ENTER(env, ip); + if ((ret = __log_check_sizes(env, lg_max, 0)) == 0) { + LOG_SYSTEM_LOCK(env); + lp->log_nsize = lg_max; + LOG_SYSTEM_UNLOCK(env); + } + ENV_LEAVE(env, ip); + } else + dbenv->lg_size = lg_max; + + return (ret); +} + +/* + * PUBLIC: int __log_get_lg_regionmax __P((DB_ENV *, u_int32_t *)); + */ +int +__log_get_lg_regionmax(dbenv, lg_regionmaxp) + DB_ENV *dbenv; + u_int32_t *lg_regionmaxp; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->get_lg_regionmax", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *lg_regionmaxp = + ((LOG *)env->lg_handle->reginfo.primary)->regionmax; + } else + *lg_regionmaxp = dbenv->lg_regionmax; + return (0); +} + +/* + * __log_set_lg_regionmax -- + * DB_ENV->set_lg_regionmax. + * + * PUBLIC: int __log_set_lg_regionmax __P((DB_ENV *, u_int32_t)); + */ +int +__log_set_lg_regionmax(dbenv, lg_regionmax) + DB_ENV *dbenv; + u_int32_t lg_regionmax; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lg_regionmax"); + + /* Let's not be silly. */ + if (lg_regionmax != 0 && lg_regionmax < LG_BASE_REGION_SIZE) { + __db_errx(env, DB_STR_A("2569", + "log region size must be >= %d", + "%d"), LG_BASE_REGION_SIZE); + return (EINVAL); + } + + dbenv->lg_regionmax = lg_regionmax; + return (0); +} + +/* + * PUBLIC: int __log_get_lg_dir __P((DB_ENV *, const char **)); + */ +int +__log_get_lg_dir(dbenv, dirp) + DB_ENV *dbenv; + const char **dirp; +{ + *dirp = dbenv->db_log_dir; + return (0); +} + +/* + * __log_set_lg_dir -- + * DB_ENV->set_lg_dir. + * + * PUBLIC: int __log_set_lg_dir __P((DB_ENV *, const char *)); + */ +int +__log_set_lg_dir(dbenv, dir) + DB_ENV *dbenv; + const char *dir; +{ + ENV *env; + + env = dbenv->env; + + if (dbenv->db_log_dir != NULL) + __os_free(env, dbenv->db_log_dir); + return (__os_strdup(env, dir, &dbenv->db_log_dir)); +} + +/* + * __log_get_flags -- + * DB_ENV->get_flags. + * + * PUBLIC: void __log_get_flags __P((DB_ENV *, u_int32_t *)); + */ +void +__log_get_flags(dbenv, flagsp) + DB_ENV *dbenv; + u_int32_t *flagsp; +{ + DB_LOG *dblp; + ENV *env; + LOG *lp; + u_int32_t flags; + + env = dbenv->env; + + if ((dblp = env->lg_handle) == NULL) + return; + + lp = dblp->reginfo.primary; + + flags = *flagsp; + if (lp->db_log_autoremove) + LF_SET(DB_LOG_AUTO_REMOVE); + else + LF_CLR(DB_LOG_AUTO_REMOVE); + if (lp->db_log_inmemory) + LF_SET(DB_LOG_IN_MEMORY); + else + LF_CLR(DB_LOG_IN_MEMORY); + *flagsp = flags; +} + +/* + * __log_set_flags -- + * DB_ENV->set_flags. + * + * PUBLIC: void __log_set_flags __P((ENV *, u_int32_t, int)); + */ +void +__log_set_flags(env, flags, on) + ENV *env; + u_int32_t flags; + int on; +{ + DB_LOG *dblp; + LOG *lp; + + if ((dblp = env->lg_handle) == NULL) + return; + + lp = dblp->reginfo.primary; + + if (LF_ISSET(DB_LOG_AUTO_REMOVE)) + lp->db_log_autoremove = on ? 1 : 0; + if (LF_ISSET(DB_LOG_IN_MEMORY)) + lp->db_log_inmemory = on ? 1 : 0; +} + +/* + * List of flags we can handle here. DB_LOG_INMEMORY must be + * processed before creating the region, leave it out for now. + */ +#undef OK_FLAGS +#define OK_FLAGS \ + (DB_LOG_AUTO_REMOVE | DB_LOG_DIRECT | \ + DB_LOG_DSYNC | DB_LOG_IN_MEMORY | DB_LOG_ZERO) +static const FLAG_MAP LogMap[] = { + { DB_LOG_AUTO_REMOVE, DBLOG_AUTOREMOVE}, + { DB_LOG_DIRECT, DBLOG_DIRECT}, + { DB_LOG_DSYNC, DBLOG_DSYNC}, + { DB_LOG_IN_MEMORY, DBLOG_INMEMORY}, + { DB_LOG_ZERO, DBLOG_ZERO} +}; +/* + * __log_get_config -- + * Configure the logging subsystem. + * + * PUBLIC: int __log_get_config __P((DB_ENV *, u_int32_t, int *)); + */ +int +__log_get_config(dbenv, which, onp) + DB_ENV *dbenv; + u_int32_t which; + int *onp; +{ + ENV *env; + DB_LOG *dblp; + u_int32_t flags; + + env = dbenv->env; + if (FLD_ISSET(which, ~OK_FLAGS)) + return (__db_ferr(env, "DB_ENV->log_get_config", 0)); + dblp = env->lg_handle; + ENV_REQUIRES_CONFIG(env, dblp, "DB_ENV->log_get_config", DB_INIT_LOG); + + __env_fetch_flags(LogMap, sizeof(LogMap), &dblp->flags, &flags); + __log_get_flags(dbenv, &flags); + if (LF_ISSET(which)) + *onp = 1; + else + *onp = 0; + + return (0); +} + +/* + * __log_set_config -- + * Configure the logging subsystem. + * + * PUBLIC: int __log_set_config __P((DB_ENV *, u_int32_t, int)); + */ +int +__log_set_config(dbenv, flags, on) + DB_ENV *dbenv; + u_int32_t flags; + int on; +{ + return (__log_set_config_int(dbenv, flags, on, 0)); +} +/* + * __log_set_config_int -- + * Configure the logging subsystem. + * + * PUBLIC: int __log_set_config_int __P((DB_ENV *, u_int32_t, int, int)); + */ +int +__log_set_config_int(dbenv, flags, on, in_open) + DB_ENV *dbenv; + u_int32_t flags; + int on; + int in_open; +{ + ENV *env; + DB_LOG *dblp; + u_int32_t mapped_flags; + + env = dbenv->env; + dblp = env->lg_handle; + if (FLD_ISSET(flags, ~OK_FLAGS)) + return (__db_ferr(env, "DB_ENV->log_set_config", 0)); + ENV_NOT_CONFIGURED(env, dblp, "DB_ENV->log_set_config", DB_INIT_LOG); + if (LF_ISSET(DB_LOG_DIRECT) && __os_support_direct_io() == 0) { + __db_errx(env, +"DB_ENV->log_set_config: direct I/O either not configured or not supported"); + return (EINVAL); + } + + if (LOGGING_ON(env)) { + if (!in_open && LF_ISSET(DB_LOG_IN_MEMORY) && + ((LOG *)dblp->reginfo.primary)->db_log_inmemory == 0) + ENV_ILLEGAL_AFTER_OPEN(env, + "DB_ENV->log_set_config: DB_LOG_IN_MEMORY"); + __log_set_flags(env, flags, on); + mapped_flags = 0; + __env_map_flags(LogMap, sizeof(LogMap), &flags, &mapped_flags); + if (on) + F_SET(dblp, mapped_flags); + else + F_CLR(dblp, mapped_flags); + } else { + /* + * DB_LOG_IN_MEMORY, DB_TXN_NOSYNC and DB_TXN_WRITE_NOSYNC + * are mutually incompatible. If we're setting one of them, + * clear all current settings. + */ + if (on && LF_ISSET(DB_LOG_IN_MEMORY)) + F_CLR(dbenv, + DB_ENV_TXN_NOSYNC | DB_ENV_TXN_WRITE_NOSYNC); + + if (on) + FLD_SET(dbenv->lg_flags, flags); + else + FLD_CLR(dbenv->lg_flags, flags); + } + + return (0); +} + +/* + * __log_check_sizes -- + * Makes sure that the log file size and log buffer size are compatible. + * + * PUBLIC: int __log_check_sizes __P((ENV *, u_int32_t, u_int32_t)); + */ +int +__log_check_sizes(env, lg_max, lg_bsize) + ENV *env; + u_int32_t lg_max; + u_int32_t lg_bsize; +{ + DB_ENV *dbenv; + LOG *lp; + int inmem; + + dbenv = env->dbenv; + + if (LOGGING_ON(env)) { + lp = env->lg_handle->reginfo.primary; + inmem = lp->db_log_inmemory; + lg_bsize = lp->buffer_size; + } else + inmem = (FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) != 0); + + if (inmem) { + if (lg_bsize == 0) + lg_bsize = LG_BSIZE_INMEM; + if (lg_max == 0) + lg_max = LG_MAX_INMEM; + + if (lg_bsize <= lg_max) { + __db_errx(env, + "in-memory log buffer must be larger than the log file size"); + return (EINVAL); + } + } + + return (0); +} diff --git a/src/log/log_print.c b/src/log/log_print.c new file mode 100644 index 00000000..d2cda519 --- /dev/null +++ b/src/log/log_print.c @@ -0,0 +1,380 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" + +static int __log_print_dbregister __P((ENV *, DBT *, DB_LOG *)); + +/* + * PUBLIC: int __log_print_record __P((ENV *, + * PUBLIC: DBT *, DB_LSN *, char *, DB_LOG_RECSPEC *, void *)); + */ +int +__log_print_record(env, recbuf, lsnp, name, spec, info) + ENV *env; + DBT *recbuf; + DB_LSN *lsnp; + char *name; + DB_LOG_RECSPEC *spec; + void *info; +{ + DB *dbp; + DBT dbt; + DB_LOG_RECSPEC *sp, *np; + DB_LOG *dblp; + DB_LSN prev_lsn; + DB_MSGBUF msgbuf; + LOG *lp; + PAGE *hdrstart, *hdrtmp; + int32_t inttmp; + u_int32_t hdrsize, op, uinttmp; + u_int32_t type, txnid; + u_int8_t *bp, *datatmp; + int has_data, ret, downrev; + struct tm *lt; + time_t timeval; + char time_buf[CTIME_BUFLEN], *s; + const char *hdrname; + + COMPQUIET(hdrstart, NULL); + COMPQUIET(hdrname, NULL); + COMPQUIET(hdrsize, 0); + COMPQUIET(has_data, 0); + COMPQUIET(op, 0); + + bp = recbuf->data; + dblp = info; + dbp = NULL; + lp = env->lg_handle->reginfo.primary; + downrev = lp->persist.version < DB_LOGVERSION_50; + DB_MSGBUF_INIT(&msgbuf); + + /* + * The first three fields are always the same in every arg + * struct so we know their offsets. + */ + /* type */ + LOGCOPY_32(env, &type, bp); + bp += sizeof(u_int32_t); + + /* txnp */ + LOGCOPY_32(env, &txnid, bp); + bp += sizeof(txnid); + + /* Previous LSN */ + LOGCOPY_TOLSN(env,&prev_lsn, bp); + bp += sizeof(DB_LSN); + __db_msgadd(env, &msgbuf, + "[%lu][%lu]%s%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, (u_long)lsnp->offset, + name, (type & DB_debug_FLAG) ? "_debug" : "", + (u_long)type, + (u_long)txnid, + (u_long)prev_lsn.file, (u_long)prev_lsn.offset); + + for (sp = spec; sp->type != LOGREC_Done; sp++) { + switch (sp->type) { + case LOGREC_OP: + LOGCOPY_32(env, &op, bp); + __db_msgadd(env, &msgbuf, "\t%s: ", sp->name); + __db_msgadd(env, &msgbuf, sp->fmt, OP_MODE_GET(op)); + __db_msgadd(env, &msgbuf, " ptype: %s\n", + __db_pagetype_to_string(OP_PAGE_GET(op))); + bp += sizeof(uinttmp); + break; + case LOGREC_DB: + LOGCOPY_32(env, &inttmp, bp); + __db_msgadd(env, &msgbuf, "\t%s: %lu\n", + sp->name, (unsigned long)inttmp); + bp += sizeof(inttmp); + if (dblp != NULL && inttmp < dblp->dbentry_cnt) + dbp = dblp->dbentry[inttmp].dbp; + break; + + case LOGREC_DBOP: + /* Special op for dbreg_register records. */ + if (dblp != NULL && (ret = + __log_print_dbregister(env, recbuf, dblp)) != 0) + return (ret); + LOGCOPY_32(env, &uinttmp, bp); + switch (FLD_ISSET(uinttmp, DBREG_OP_MASK)) { + case DBREG_CHKPNT: + s = "CHKPNT"; + break; + case DBREG_CLOSE: + s = "CLOSE"; + break; + case DBREG_OPEN: + s = "OPEN"; + break; + case DBREG_PREOPEN: + s = "PREOPEN"; + break; + case DBREG_RCLOSE: + s = "RCLOSE"; + break; + case DBREG_REOPEN: + s = "REOPEN"; + break; + case DBREG_XCHKPNT: + s = "XCHKPNT"; + break; + case DBREG_XOPEN: + s = "XOPEN"; + break; + case DBREG_XREOPEN: + s = "XREOPEN"; + break; + default: + s = "UNKNOWN"; + break; + } + __db_msgadd(env, &msgbuf, "\t%s: %s %lx\n", sp->name, + s, (unsigned long)(uinttmp & ~DBREG_OP_MASK)); + bp += sizeof(uinttmp); + break; + case LOGREC_ARG: + LOGCOPY_32(env, &uinttmp, bp); + __db_msgadd(env, &msgbuf, "\t%s: ", sp->name); + __db_msgadd(env, &msgbuf, sp->fmt, uinttmp); + __db_msgadd(env, &msgbuf, "\n"); + bp += sizeof(uinttmp); + break; + case LOGREC_TIME: + /* time_t is long but we only store 32 bits. */ + LOGCOPY_32(env, &uinttmp, bp); + timeval = uinttmp; + lt = localtime(&timeval); + __db_msgadd(env, &msgbuf, + "\t%s: %ld (%.24s, 20%02lu%02lu%02lu%02lu%02lu.%02lu)\n", + sp->name, (long)timeval, + __os_ctime(&timeval, time_buf), + (u_long)lt->tm_year - 100, (u_long)lt->tm_mon+1, + (u_long)lt->tm_mday, (u_long)lt->tm_hour, + (u_long)lt->tm_min, (u_long)lt->tm_sec); + bp += sizeof(uinttmp); + break; + case LOGREC_PGDBT: + case LOGREC_PGDDBT: + case LOGREC_PGLIST: + case LOGREC_LOCKS: + case LOGREC_HDR: + case LOGREC_DATA: + case LOGREC_DBT: + LOGCOPY_32(env, &uinttmp, bp); + bp += sizeof(u_int32_t); + switch (sp->type) { + case LOGREC_HDR: + if (uinttmp == 0) + break; + has_data = 0; + for (np = sp + 1; np->type != LOGREC_Done; np++) + if (np->type == LOGREC_DATA) { + has_data = 1; + break; + } + + hdrstart = (PAGE*)bp; + hdrsize = uinttmp; + hdrname = sp->name; + if (has_data == 1) + break; + /* FALLTHROUGH */ + case LOGREC_DATA: + if (downrev ? LOG_SWAPPED(env) : + (dbp != NULL && F_ISSET(dbp, DB_AM_SWAP))) + __db_recordswap(op, hdrsize, hdrstart, + (has_data && uinttmp != 0) ? + bp : NULL, 1); + __db_msgadd(env, &msgbuf, "\t%s: ", hdrname); + __db_prbytes(env, &msgbuf, + (u_int8_t *)hdrstart, hdrsize); + if (has_data == 0 || uinttmp == 0) + break; + /* FALLTHROUGH */ + default: + __db_msgadd(env, &msgbuf, "\t%s: ", sp->name); + pr_data: + __db_prbytes(env, &msgbuf, bp, uinttmp); + has_data = 0; + break; + case LOGREC_PGDBT: + has_data = 0; + for (np = sp + 1; np->type != LOGREC_Done; np++) + if (np->type == LOGREC_PGDDBT) { + has_data = 1; + break; + } + + hdrstart = (PAGE*)bp; + hdrsize = uinttmp; + if (has_data == 1) + break; + /* FALLTHROUGH */ + case LOGREC_PGDDBT: + DB_ASSERT(env, hdrstart != NULL); + if (dbp != NULL && (downrev ? LOG_SWAPPED(env) : + F_ISSET(dbp, DB_AM_SWAP))) { + dbt.data = bp; + dbt.size = uinttmp; + if ((ret = __db_pageswap(env, dbp, + hdrstart, hdrsize, has_data == 0 ? + NULL : &dbt, 1)) != 0) + return (ret); + } + if (downrev) + goto pr_data; + if (ALIGNP_INC(hdrstart, + sizeof(u_int32_t)) != hdrstart) { + if ((ret = __os_malloc(env, + hdrsize, &hdrtmp)) != 0) + return (ret); + memcpy(hdrtmp, hdrstart, hdrsize); + } else + hdrtmp = hdrstart; + if (has_data == 1 && ALIGNP_INC(bp, + sizeof(u_int32_t)) != bp) { + if ((ret = __os_malloc(env, + uinttmp, &datatmp)) != 0) + return (ret); + memcpy(datatmp, bp, uinttmp); + } else if (has_data == 1) + datatmp = bp; + else + datatmp = NULL; + if ((ret = __db_prpage_int(env, &msgbuf, + dbp, "\t", hdrtmp, + uinttmp, datatmp, DB_PR_PAGE)) != 0) + return (ret); + has_data = 0; + if (hdrtmp != hdrstart) + __os_free(env, hdrtmp); + if (datatmp != bp && datatmp != NULL) + __os_free(env, datatmp); + break; + case LOGREC_PGLIST: + dbt.data = bp; + dbt.size = uinttmp; + __db_pglist_print(env, &msgbuf, &dbt); + break; + case LOGREC_LOCKS: + dbt.data = bp; + dbt.size = uinttmp; + __lock_list_print(env, &msgbuf, &dbt); + break; + } + bp += uinttmp; + break; + + case LOGREC_POINTER: + LOGCOPY_TOLSN(env, &prev_lsn, bp); + __db_msgadd(env, &msgbuf, + "\t%s: [%lu][%lu]\n", sp->name, + (u_long)prev_lsn.file, (u_long)prev_lsn.offset); + bp += sizeof(DB_LSN); + break; + case LOGREC_Done: + DB_ASSERT(env, sp->type != LOGREC_Done); + } + } + if (msgbuf.buf != NULL) + DB_MSGBUF_FLUSH(env, &msgbuf); + else + __db_msg(env, "%s", ""); + return (0); +} + +/* + * __log_print_dbregister -- + * So that we can properly swap and print information from databases + * we generate dummy DB handles here. These are real handles that are never + * opened but their fileid, meta_pgno and some flags are set properly. + * This code uses parallel structures to those in the dbregister code. + * The DB_LOG handle passed in must NOT be the real environment handle + * since this would confuse actual running transactions if printing is + * done while the environment is active. + */ +static int +__log_print_dbregister(env, recbuf, dblp) + ENV *env; + DBT *recbuf; + DB_LOG *dblp; +{ + __dbreg_register_args *argp; + DB *dbp; + DB_ENTRY *dbe; + int ret; + + if ((ret = __dbreg_register_read(env, recbuf->data, &argp)) != 0) + return (ret); + + if (dblp->dbentry_cnt <= argp->fileid && + (ret = __dbreg_add_dbentry(env, dblp, NULL, argp->fileid)) != 0) + goto err; + dbe = &dblp->dbentry[argp->fileid]; + dbp = dbe->dbp; + + switch (FLD_ISSET(argp->opcode, DBREG_OP_MASK)) { + case DBREG_CHKPNT: + case DBREG_OPEN: + case DBREG_REOPEN: + case DBREG_XCHKPNT: + case DBREG_XOPEN: + case DBREG_XREOPEN: + if (dbp != NULL) { + if (memcmp(dbp->fileid, + argp->uid.data, DB_FILE_ID_LEN) == 0 && + dbp->meta_pgno == argp->meta_pgno) + goto done; + if ((__db_close(dbp, NULL, DB_NOSYNC)) != 0) + goto err; + dbe->dbp = dbp = NULL; + } + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto err; + memcpy(dbp->fileid, argp->uid.data, DB_FILE_ID_LEN); + dbp->meta_pgno = argp->meta_pgno; + F_SET(dbp, DB_AM_RECOVER); + /* + * We need to swap bytes if we are on a BIGEND machine XOR + * we have a BIGEND database. + */ + if ((F_ISSET(env, ENV_LITTLEENDIAN) == 0) ^ + (FLD_ISSET(argp->opcode, DBREG_BIGEND) != 0)) + F_SET(dbp, DB_AM_SWAP); + if (FLD_ISSET(argp->opcode, DBREG_CHKSUM)) + F_SET(dbp, DB_AM_CHKSUM); + if (FLD_ISSET(argp->opcode, DBREG_ENCRYPT)) + F_SET(dbp, DB_AM_ENCRYPT); + if (FLD_ISSET(argp->opcode, DBREG_EXCL)) + F2_SET(dbp, DB2_AM_EXCL); + dbe->dbp = dbp; + break; + case DBREG_CLOSE: + case DBREG_RCLOSE: + if (dbp == NULL) + goto err; + if ((__db_close(dbp, NULL, DB_NOSYNC)) != 0) + goto err; + dbe->dbp = dbp = NULL; + break; + case DBREG_PREOPEN: + break; + default: + DB_ASSERT(env, argp->opcode != argp->opcode); + } +done: +err: + __os_free(env, argp); + return (ret); +} diff --git a/src/log/log_put.c b/src/log/log_put.c new file mode 100644 index 00000000..8f7e23d8 --- /dev/null +++ b/src/log/log_put.c @@ -0,0 +1,2041 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" +#include "dbinc/db_page.h" +#include "dbinc_auto/db_ext.h" + +static int __log_encrypt_record __P((ENV *, DBT *, HDR *, u_int32_t)); +static int __log_file __P((ENV *, const DB_LSN *, char *, size_t)); +static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t)); +static int __log_flush_commit __P((ENV *, const DB_LSN *, u_int32_t)); +static int __log_newfh __P((DB_LOG *, int)); +static int __log_put_next __P((ENV *, + DB_LSN *, const DBT *, HDR *, DB_LSN *)); +static int __log_put_record_int __P((ENV *, DB *, DB_TXN *, DB_LSN *, + u_int32_t, u_int32_t, u_int32_t, u_int32_t, DB_LOG_RECSPEC *, va_list)); +static int __log_putr __P((DB_LOG *, + DB_LSN *, const DBT *, u_int32_t, HDR *)); +static int __log_write __P((DB_LOG *, void *, u_int32_t)); + +/* + * __log_put_pp -- + * ENV->log_put pre/post processing. + * + * PUBLIC: int __log_put_pp __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); + */ +int +__log_put_pp(dbenv, lsnp, udbt, flags) + DB_ENV *dbenv; + DB_LSN *lsnp; + const DBT *udbt; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_put", DB_INIT_LOG); + + /* Validate arguments: check for allowed flags. */ + if ((ret = __db_fchk(env, "DB_ENV->log_put", flags, + DB_LOG_CHKPNT | DB_LOG_COMMIT | + DB_FLUSH | DB_LOG_NOCOPY | DB_LOG_WRNOSYNC)) != 0) + return (ret); + + /* DB_LOG_WRNOSYNC and DB_FLUSH are mutually exclusive. */ + if (LF_ISSET(DB_LOG_WRNOSYNC) && LF_ISSET(DB_FLUSH)) + return (__db_ferr(env, "DB_ENV->log_put", 1)); + + /* Replication clients should never write log records. */ + if (IS_REP_CLIENT(env)) { + __db_errx(env, DB_STR("2511", + "DB_ENV->log_put is illegal on replication clients")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_put(env, lsnp, udbt, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_put -- + * ENV->log_put. + * + * PUBLIC: int __log_put __P((ENV *, DB_LSN *, const DBT *, u_int32_t)); + */ +int +__log_put(env, lsnp, udbt, flags) + ENV *env; + DB_LSN *lsnp; + const DBT *udbt; + u_int32_t flags; +{ + DBT *dbt, t; + DB_CIPHER *db_cipher; + DB_LOG *dblp; + DB_LSN lsn, old_lsn; + DB_REP *db_rep; + HDR hdr; + LOG *lp; + REP *rep; + int lock_held, need_free, ret; + u_int8_t *key; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + db_cipher = env->crypto_handle; + db_rep = env->rep_handle; + if (db_rep != NULL) + rep = db_rep->region; + else + rep = NULL; + + dbt = &t; + t = *udbt; + lock_held = need_free = 0; + ZERO_LSN(old_lsn); + hdr.len = hdr.prev = 0; + + /* + * In general, if we are not a rep application, but are sharing a master + * rep env, we should not be writing log records. However, we can allow + * a non-replication-aware process to join a pre-existing repmgr + * environment, if env handle meets repmgr's DB_THREAD requirement. + */ + + if (IS_REP_MASTER(env) && db_rep->send == NULL) { +#ifdef HAVE_REPLICATION_THREADS + if (F_ISSET(env, ENV_THREAD) && APP_IS_REPMGR(env)) { + if ((ret = __repmgr_autostart(env)) != 0) + return (ret); + } else +#endif + { +#if !defined(DEBUG_ROP) && !defined(DEBUG_WOP) + __db_errx(env, DB_STR("2512", + "Non-replication DB_ENV handle attempting " + "to modify a replicated environment")); + return (EINVAL); +#endif + } + } + DB_ASSERT(env, !IS_REP_CLIENT(env)); + + /* + * If we are coming from the logging code, we use an internal flag, + * DB_LOG_NOCOPY, because we know we can overwrite/encrypt the log + * record in place. Otherwise, if a user called log_put then we + * must copy it to new memory so that we know we can write it. + * + * We also must copy it to new memory if we are a replication master + * so that we retain an unencrypted copy of the log record to send + * to clients. + */ + if (!LF_ISSET(DB_LOG_NOCOPY) || IS_REP_MASTER(env)) { + if (CRYPTO_ON(env)) + t.size += db_cipher->adj_size(udbt->size); + if ((ret = __os_calloc(env, 1, t.size, &t.data)) != 0) + goto err; + need_free = 1; + memcpy(t.data, udbt->data, udbt->size); + } + if ((ret = __log_encrypt_record(env, dbt, &hdr, udbt->size)) != 0) + goto err; + if (CRYPTO_ON(env)) + key = db_cipher->mac_key; + else + key = NULL; +#ifdef HAVE_LOG_CHECKSUM + __db_chksum(&hdr, dbt->data, dbt->size, key, hdr.chksum); +#endif + + LOG_SYSTEM_LOCK(env); + lock_held = 1; + + if ((ret = __log_put_next(env, &lsn, dbt, &hdr, &old_lsn)) != 0) + goto panic_check; + + /* + * Assign the return LSN before dropping the region lock. Necessary + * in case the lsn is a begin_lsn from a TXN_DETAIL structure passed in + * by the logging routines. We use atomic 32-bit operations because + * during commit this will be a TXN_DETAIL visible_lsn field, and MVCC + * relies on reading the fields atomically. + */ + lsnp->file = lsn.file; + lsnp->offset = lsn.offset; + +#ifdef HAVE_REPLICATION + if (IS_REP_MASTER(env)) { + __rep_newfile_args nf_args; + DBT newfiledbt; + REP_BULK bulk; + size_t len; + u_int32_t ctlflags; + u_int8_t buf[__REP_NEWFILE_SIZE]; + + /* + * Replication masters need to drop the lock to send messages, + * but want to drop and reacquire it a minimal number of times. + */ + ctlflags = LF_ISSET(DB_LOG_COMMIT | DB_LOG_CHKPNT) ? + REPCTL_PERM : 0; + LOG_SYSTEM_UNLOCK(env); + lock_held = 0; + if (LF_ISSET(DB_FLUSH)) + ctlflags |= REPCTL_FLUSH; + + /* + * If we changed files and we're in a replicated environment, + * we need to inform our clients now that we've dropped the + * region lock. + * + * Note that a failed NEWFILE send is a dropped message that + * our client can handle, so we can ignore it. It's possible + * that the record we already put is a commit, so we don't just + * want to return failure. + */ + if (!IS_ZERO_LSN(old_lsn)) { + memset(&newfiledbt, 0, sizeof(newfiledbt)); + nf_args.version = lp->persist.version; + (void)__rep_newfile_marshal(env, &nf_args, + buf, __REP_NEWFILE_SIZE, &len); + DB_INIT_DBT(newfiledbt, buf, len); + (void)__rep_send_message(env, DB_EID_BROADCAST, + REP_NEWFILE, &old_lsn, &newfiledbt, 0, 0); + } + + /* + * If we're doing bulk processing put it in the bulk buffer. + */ + ret = 0; + if (FLD_ISSET(rep->config, REP_C_BULK)) { + /* + * Bulk could have been turned on by another process. + * If so, set the address into the bulk region now. + */ + if (db_rep->bulk == NULL) + db_rep->bulk = R_ADDR(&dblp->reginfo, + lp->bulk_buf); + memset(&bulk, 0, sizeof(bulk)); + bulk.addr = db_rep->bulk; + bulk.offp = &lp->bulk_off; + bulk.len = lp->bulk_len; + bulk.lsn = lsn; + bulk.type = REP_BULK_LOG; + bulk.eid = DB_EID_BROADCAST; + bulk.flagsp = &lp->bulk_flags; + ret = __rep_bulk_message(env, &bulk, NULL, + &lsn, udbt, ctlflags); + } + if (!FLD_ISSET(rep->config, REP_C_BULK) || + ret == DB_REP_BULKOVF) { + /* + * Then send the log record itself on to our clients. + */ + /* + * !!! + * In the crypto case, we MUST send the udbt, not the + * now-encrypted dbt. Clients have no way to decrypt + * without the header. + */ + ret = __rep_send_message(env, DB_EID_BROADCAST, + REP_LOG, &lsn, udbt, ctlflags, 0); + } + if (FLD_ISSET(ctlflags, REPCTL_PERM)) { + LOG_SYSTEM_LOCK(env); +#ifdef HAVE_STATISTICS + if (IS_USING_LEASES(env)) + rep->stat.st_lease_sends++; +#endif + /* + * Keep track of our last PERM lsn. Set this on a + * master under the log lock. When using leases, if + * we set max_perm_lsn too early (before the send) + * then we hit a lot of false invalid lease checks + * which all try to refresh and hurt performance. + */ + if (LOG_COMPARE(&lp->max_perm_lsn, &lsn) < 0) + lp->max_perm_lsn = lsn; + LOG_SYSTEM_UNLOCK(env); + } + /* + * If the send fails and we're a commit or checkpoint, + * there's nothing we can do; the record's in the log. + * Flush it, even if we're running with TXN_NOSYNC, + * on the grounds that it should be in durable + * form somewhere. + */ + if (ret != 0 && FLD_ISSET(ctlflags, REPCTL_PERM)) + LF_SET(DB_FLUSH); + /* + * We ignore send failures so reset 'ret' to 0 here. + * We needed to check special return values from + * bulk transfer and errors from either bulk or normal + * message sending need flushing on perm records. But + * otherwise we need to ignore it and reset it now. + */ + ret = 0; + } +#endif + + /* + * If needed, do a flush. Note that failures at this point + * are only permissible if we know we haven't written a commit + * record; __log_flush_commit is responsible for enforcing this. + * + * If a flush is not needed, see if WRITE_NOSYNC was set and we + * need to write out the log buffer. + */ + if (LF_ISSET(DB_FLUSH | DB_LOG_WRNOSYNC)) { + if (!lock_held) { + LOG_SYSTEM_LOCK(env); + lock_held = 1; + } + if ((ret = __log_flush_commit(env, &lsn, flags)) != 0) + goto panic_check; + } + + /* + * If flushed a checkpoint record, reset the "bytes since the last + * checkpoint" counters. + */ + if (LF_ISSET(DB_LOG_CHKPNT)) + lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; + + /* Increment count of records added to the log. */ + STAT(++lp->stat.st_record); + + if (0) { +panic_check: /* + * Writing log records cannot fail if we're a replication + * master. The reason is that once we send the record to + * replication clients, the transaction can no longer + * abort, otherwise the master would be out of sync with + * the rest of the replication group. Panic the system. + */ + if (ret != 0 && IS_REP_MASTER(env)) + ret = __env_panic(env, ret); + } + +err: if (lock_held) + LOG_SYSTEM_UNLOCK(env); + if (need_free) + __os_free(env, dbt->data); + + /* + * If auto-remove is set and we switched files, remove unnecessary + * log files. + */ + if (ret == 0 && !IS_ZERO_LSN(old_lsn) && lp->db_log_autoremove) + __log_autoremove(env); + + return (ret); +} + +/* + * __log_current_lsn_int -- + * internal operations of __log_current_lsn + * + * PUBLIC: int __log_current_lsn_int + * PUBLIC: __P((ENV *, DB_LSN *, u_int32_t *, u_int32_t *)); + */ +int +__log_current_lsn_int(env, lsnp, mbytesp, bytesp) + ENV *env; + DB_LSN *lsnp; + u_int32_t *mbytesp, *bytesp; +{ + DB_LOG *dblp; + LOG *lp; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + LOG_SYSTEM_LOCK(env); + + /* + * We need the LSN of the last entry in the log. + * + * Typically, it's easy to get the last written LSN, you simply look + * at the current log pointer and back up the number of bytes of the + * last log record. However, if the last thing we did was write the + * log header of a new log file, then, this doesn't work, so we return + * the first log record that will be written in this new file. + */ + *lsnp = lp->lsn; + if (lp->lsn.offset > lp->len) + lsnp->offset -= lp->len; + + /* + * Since we're holding the log region lock, return the bytes put into + * the log since the last checkpoint, transaction checkpoint needs it. + * + * We add the current buffer offset so as to count bytes that have not + * yet been written, but are sitting in the log buffer. + */ + if (mbytesp != NULL) { + *mbytesp = lp->stat.st_wc_mbytes; + *bytesp = (u_int32_t)(lp->stat.st_wc_bytes + lp->b_off); + } + + LOG_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * __log_current_lsn -- + * Return the current LSN. + * + * PUBLIC: int __log_current_lsn + * PUBLIC: __P((ENV *, DB_LSN *, u_int32_t *, u_int32_t *)); + */ +int +__log_current_lsn(env, lsnp, mbytesp, bytesp) + ENV *env; + DB_LSN *lsnp; + u_int32_t *mbytesp, *bytesp; +{ + DB_THREAD_INFO *ip; + int ret; + + ret = 0; + ENV_ENTER(env, ip); + ret = __log_current_lsn_int(env, lsnp, mbytesp, bytesp); + ENV_LEAVE(env, ip); + + return ret; +} + +/* + * __log_put_next -- + * Put the given record as the next in the log, wherever that may + * turn out to be. + */ +static int +__log_put_next(env, lsn, dbt, hdr, old_lsnp) + ENV *env; + DB_LSN *lsn; + const DBT *dbt; + HDR *hdr; + DB_LSN *old_lsnp; +{ + DB_LOG *dblp; + DB_LSN old_lsn; + LOG *lp; + int adv_file, newfile, ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + /* + * Save a copy of lp->lsn before we might decide to switch log + * files and change it. If we do switch log files, and we're + * doing replication, we'll need to tell our clients about the + * switch, and they need to receive a NEWFILE message + * with this "would-be" LSN in order to know they're not + * missing any log records. + */ + old_lsn = lp->lsn; + newfile = 0; + adv_file = 0; + /* + * If our current log is at an older version and we want to write + * a record then we need to advance the log. + */ + if (lp->persist.version != DB_LOGVERSION) { + __log_set_version(env, DB_LOGVERSION); + adv_file = 1; + } + + /* + * If this information won't fit in the file, or if we're a + * replication client environment and have been told to do so, + * swap files. + */ + if (adv_file || lp->lsn.offset == 0 || + lp->lsn.offset + hdr->size + dbt->size > lp->log_size) { + if (hdr->size + sizeof(LOGP) + dbt->size > lp->log_size) { + __db_errx(env, DB_STR_A("2513", + "DB_ENV->log_put: record larger than maximum file size (%lu > %lu)", + "%lu %lu"), + (u_long)hdr->size + sizeof(LOGP) + dbt->size, + (u_long)lp->log_size); + return (EINVAL); + } + + if ((ret = __log_newfile(dblp, NULL, 0, 0)) != 0) + return (ret); + + /* + * Flag that we switched files, in case we're a master + * and need to send this information to our clients. + * We postpone doing the actual send until we can + * safely release the log region lock and are doing so + * anyway. + */ + newfile = 1; + } + + /* If we switched log files, let our caller know where. */ + if (newfile) + *old_lsnp = old_lsn; + + /* Actually put the record. */ + return (__log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len, hdr)); +} + +/* + * __log_flush_commit -- + * Flush a record. + */ +static int +__log_flush_commit(env, lsnp, flags) + ENV *env; + const DB_LSN *lsnp; + u_int32_t flags; +{ + DB_LOG *dblp; + DB_LSN flush_lsn; + HDR hdr; + LOG *lp; + int ret, t_ret; + size_t nr, nw; + u_int8_t *buffer; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + flush_lsn = *lsnp; + + ret = 0; + + /* + * DB_FLUSH: + * Flush a record for which the DB_FLUSH flag to log_put was set. + * + * DB_LOG_WRNOSYNC: + * If there's anything in the current log buffer, write it out. + */ + if (LF_ISSET(DB_FLUSH)) + ret = __log_flush_int(dblp, &flush_lsn, 1); + else if (!lp->db_log_inmemory && lp->b_off != 0) + if ((ret = __log_write(dblp, + dblp->bufp, (u_int32_t)lp->b_off)) == 0) + lp->b_off = 0; + + /* + * If a flush supporting a transaction commit fails, we must abort the + * transaction. (If we aren't doing a commit, return the failure; if + * if the commit we care about made it to disk successfully, we just + * ignore the failure, because there's no way to undo the commit.) + */ + if (ret == 0 || !LF_ISSET(DB_LOG_COMMIT)) + return (ret); + + if (LF_ISSET(DB_FLUSH) ? + flush_lsn.file != lp->s_lsn.file || + flush_lsn.offset < lp->s_lsn.offset : + flush_lsn.file != lp->lsn.file || flush_lsn.offset < lp->w_off) + return (0); + + if (IS_REP_MASTER(env)) { + __db_err(env, ret, DB_STR("2514", + "Write failed on MASTER commit.")); + return (__env_panic(env, ret)); + } + + /* + * Else, make sure that the commit record does not get out after we + * abort the transaction. Do this by overwriting the commit record + * in the buffer. (Note that other commits in this buffer will wait + * until a successful write happens, we do not wake them.) We point + * at the right part of the buffer and write an abort record over the + * commit. We must then try and flush the buffer again, since the + * interesting part of the buffer may have actually made it out to + * disk before there was a failure, we can't know for sure. + */ + if (flush_lsn.offset > lp->w_off) { + if ((t_ret = __txn_force_abort(env, + dblp->bufp + flush_lsn.offset - lp->w_off)) != 0) + return (__env_panic(env, t_ret)); + } else { + /* + * The buffer was written, but its not on disk, we + * must read it back and force things from a commit + * state to an abort state. Lots of things could fail + * here and we will be left with a commit record but + * a panic return. + */ + if ( + (t_ret = __os_seek(env, + dblp->lfhp, 0, 0, flush_lsn.offset)) != 0 || + (t_ret = __os_read(env, dblp->lfhp, &hdr, + HDR_NORMAL_SZ, &nr)) != 0 || nr != HDR_NORMAL_SZ) + return (__env_panic(env, t_ret == 0 ? EIO : t_ret)); + if (LOG_SWAPPED(env)) + __log_hdrswap(&hdr, CRYPTO_ON(env)); + if ((t_ret = __os_malloc(env, hdr.len, &buffer)) != 0 || + (t_ret = __os_seek(env, + dblp->lfhp, 0, 0, flush_lsn.offset)) != 0 || + (t_ret = __os_read(env, dblp->lfhp, buffer, + hdr.len, &nr)) != 0 || nr != hdr.len || + (t_ret = __txn_force_abort(env, buffer)) != 0 || + (t_ret = __os_seek(env, + dblp->lfhp, 0, 0, flush_lsn.offset)) != 0 || + (t_ret = __os_write(env, dblp->lfhp, buffer, + nr, &nw)) != 0 || nw != nr) + return (__env_panic(env, t_ret == 0 ? EIO : t_ret)); + __os_free(env, buffer); + } + /* + * Try to flush the log again, if the disk just bounced then we + * want to be sure it does not go away again before we write the + * abort record. + */ + (void)__log_flush_int(dblp, &flush_lsn, 0); + + return (ret); +} + +/* + * __log_newfile -- + * Initialize and switch to a new log file. (Note that this is + * called both when no log yet exists and when we fill a log file.) + * + * PUBLIC: int __log_newfile __P((DB_LOG *, DB_LSN *, u_int32_t, u_int32_t)); + */ +int +__log_newfile(dblp, lsnp, logfile, version) + DB_LOG *dblp; + DB_LSN *lsnp; + u_int32_t logfile; + u_int32_t version; +{ + DBT t; + DB_CIPHER *db_cipher; + DB_LSN lsn; + ENV *env; + HDR hdr; + LOG *lp; + LOGP *tpersist; + int need_free, ret; + u_int32_t lastoff; + size_t tsize; + + env = dblp->env; + lp = dblp->reginfo.primary; + + /* + * If we're not specifying a specific log file number and we're + * not at the beginning of a file already, start a new one. + */ + if (logfile == 0 && lp->lsn.offset != 0) { + /* + * Flush the log so this file is out and can be closed. We + * cannot release the region lock here because we need to + * protect the end of the file while we switch. In + * particular, a thread with a smaller record than ours + * could detect that there is space in the log. Even + * blocking that event by declaring the file full would + * require all threads to wait here so that the lsn.file + * can be moved ahead after the flush completes. This + * probably can be changed if we had an lsn for the + * previous file and one for the current, but it does not + * seem like this would get much more throughput, if any. + */ + if ((ret = __log_flush_int(dblp, NULL, 0)) != 0) + return (ret); + + /* + * Save the last known offset from the previous file, we'll + * need it to initialize the persistent header information. + */ + lastoff = lp->lsn.offset; + + /* Point the current LSN to the new file. */ + ++lp->lsn.file; + lp->lsn.offset = 0; + + /* Reset the file write offset. */ + lp->w_off = 0; + } else + lastoff = 0; + + /* + * Replication may require we reset the log file name space entirely. + * In that case we also force a file switch so that replication can + * clean up old files. + */ + if (logfile != 0) { + lp->lsn.file = logfile; + lp->lsn.offset = 0; + lp->w_off = 0; + if (lp->db_log_inmemory) { + lsn = lp->lsn; + (void)__log_zero(env, &lsn); + } else { + lp->s_lsn = lp->lsn; + if ((ret = __log_newfh(dblp, 1)) != 0) + return (ret); + } + } + + DB_ASSERT(env, lp->db_log_inmemory || lp->b_off == 0); + if (lp->db_log_inmemory && + (ret = __log_inmem_newfile(dblp, lp->lsn.file)) != 0) + return (ret); + + /* + * Insert persistent information as the first record in every file. + * Note that the previous length is wrong for the very first record + * of the log, but that's okay, we check for it during retrieval. + */ + memset(&t, 0, sizeof(t)); + memset(&hdr, 0, sizeof(HDR)); + + need_free = 0; + tsize = sizeof(LOGP); + db_cipher = env->crypto_handle; + if (CRYPTO_ON(env)) + tsize += db_cipher->adj_size(tsize); + if ((ret = __os_calloc(env, 1, tsize, &tpersist)) != 0) + return (ret); + need_free = 1; + /* + * If we're told what version to make this file, then we + * need to be at that version. Update here. + */ + if (version != 0) { + __log_set_version(env, version); + if ((ret = __env_init_rec(env, version)) != 0) + goto err; + } + lp->persist.log_size = lp->log_size = lp->log_nsize; + memcpy(tpersist, &lp->persist, sizeof(LOGP)); + DB_SET_DBT(t, tpersist, tsize); + if (LOG_SWAPPED(env)) + __log_persistswap(tpersist); + + if ((ret = + __log_encrypt_record(env, &t, &hdr, (u_int32_t)tsize)) != 0) + goto err; + + if ((ret = __log_putr(dblp, &lsn, + &t, lastoff == 0 ? 0 : lastoff - lp->len, &hdr)) != 0) + goto err; + + /* Update the LSN information returned to the caller. */ + if (lsnp != NULL) + *lsnp = lp->lsn; + +err: if (need_free) + __os_free(env, tpersist); + return (ret); +} + +/* + * __log_putr -- + * Actually put a record into the log. + */ +static int +__log_putr(dblp, lsn, dbt, prev, h) + DB_LOG *dblp; + DB_LSN *lsn; + const DBT *dbt; + u_int32_t prev; + HDR *h; +{ + DB_CIPHER *db_cipher; + DB_LSN f_lsn; + ENV *env; + HDR tmp, *hdr; + LOG *lp; + int ret, t_ret; + db_size_t b_off; + size_t nr; + u_int32_t w_off; + + env = dblp->env; + lp = dblp->reginfo.primary; + + /* + * If we weren't given a header, use a local one. + */ + db_cipher = env->crypto_handle; + if (h == NULL) { + hdr = &tmp; + memset(hdr, 0, sizeof(HDR)); + if (CRYPTO_ON(env)) + hdr->size = HDR_CRYPTO_SZ; + else + hdr->size = HDR_NORMAL_SZ; + } else + hdr = h; + + /* Save our position in case we fail. */ + b_off = lp->b_off; + w_off = lp->w_off; + f_lsn = lp->f_lsn; + + /* + * Initialize the header. If we just switched files, lsn.offset will + * be 0, and what we really want is the offset of the previous record + * in the previous file. Fortunately, prev holds the value we want. + */ + hdr->prev = prev; + hdr->len = (u_int32_t)hdr->size + dbt->size; + +#ifdef HAVE_LOG_CHECKSUM + /* + * If we were passed in a nonzero checksum, our caller calculated + * the checksum before acquiring the log mutex, as an optimization. + * + * If our caller calculated a real checksum of 0, we'll needlessly + * recalculate it. C'est la vie; there's no out-of-bounds value + * here. + */ + if (hdr->chksum[0] == 0) { + if (lp->persist.version < DB_LOGCHKSUM) + __db_chksum(NULL, dbt->data, dbt->size, + (CRYPTO_ON(env)) ? db_cipher->mac_key : NULL, + hdr->chksum); + else + __db_chksum(hdr, dbt->data, dbt->size, + (CRYPTO_ON(env)) ? db_cipher->mac_key : NULL, + hdr->chksum); + } else if (lp->persist.version >= DB_LOGCHKSUM) + /* + * We need to include hdr->prev and len here, since they were + * still zero at the time of the caller's __db_chksum() call. + */ + LOG_HDR_SUM(CRYPTO_ON(env), hdr, hdr->chksum); +#endif + + if (lp->db_log_inmemory && (ret = __log_inmem_chkspace(dblp, + (u_int32_t)hdr->size + dbt->size)) != 0) + goto err; + + /* + * The offset into the log file at this point is the LSN where + * we're about to put this record, and is the LSN the caller wants. + */ + *lsn = lp->lsn; + + nr = hdr->size; + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + + /* nr can't overflow a 32 bit value - header size is internal. */ + ret = __log_fill(dblp, lsn, hdr, (u_int32_t)nr); + + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + + if (ret != 0) + goto err; + + if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0) + goto err; + + lp->len = (u_int32_t)(hdr->size + dbt->size); + lp->lsn.offset += lp->len; + return (0); +err: + /* + * If we wrote more than one buffer before failing, get the + * first one back. The extra buffers will fail the checksums + * and be ignored. + */ + if (w_off + lp->buffer_size < lp->w_off) { + DB_ASSERT(env, !lp->db_log_inmemory); + if ((t_ret = __os_seek(env, dblp->lfhp, 0, 0, w_off)) != 0 || + (t_ret = __os_read(env, dblp->lfhp, dblp->bufp, + b_off, &nr)) != 0) + return (__env_panic(env, t_ret)); + if (nr != b_off) { + __db_errx(env, DB_STR("2515", + "Short read while restoring log")); + return (__env_panic(env, EIO)); + } + } + + /* Reset to where we started. */ + lp->w_off = w_off; + lp->b_off = b_off; + lp->f_lsn = f_lsn; + + return (ret); +} + +/* + * __log_flush_pp -- + * ENV->log_flush pre/post processing. + * + * PUBLIC: int __log_flush_pp __P((DB_ENV *, const DB_LSN *)); + */ +int +__log_flush_pp(dbenv, lsn) + DB_ENV *dbenv; + const DB_LSN *lsn; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_flush", DB_INIT_LOG); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_flush(env, lsn)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * See if we need to wait. s_lsn is not locked so some care is needed. + * The sync point can only move forward. The lsnp->file cannot be + * greater than the s_lsn.file. If the file we want is in the past + * we are done. If the file numbers are the same check the offset. + * This all assumes we can read an 32-bit quantity in one state or + * the other, not in transition. + */ +#define ALREADY_FLUSHED(lp, lsnp) \ + (((lp)->s_lsn.file > (lsnp)->file) || \ + ((lp)->s_lsn.file == (lsnp)->file && \ + (lp)->s_lsn.offset > (lsnp)->offset)) + +/* + * __log_flush -- + * ENV->log_flush + * + * PUBLIC: int __log_flush __P((ENV *, const DB_LSN *)); + */ +int +__log_flush(env, lsn) + ENV *env; + const DB_LSN *lsn; +{ + DB_LOG *dblp; + LOG *lp; + int ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + if (lsn != NULL && ALREADY_FLUSHED(lp, lsn)) + return (0); + LOG_SYSTEM_LOCK(env); + ret = __log_flush_int(dblp, lsn, 1); + LOG_SYSTEM_UNLOCK(env); + return (ret); +} + +/* + * __log_flush_int -- + * Write all records less than or equal to the specified LSN; internal + * version. + * + * PUBLIC: int __log_flush_int __P((DB_LOG *, const DB_LSN *, int)); + */ +int +__log_flush_int(dblp, lsnp, release) + DB_LOG *dblp; + const DB_LSN *lsnp; + int release; +{ + struct __db_commit *commit; + ENV *env; + DB_LSN flush_lsn, f_lsn; + LOG *lp; + size_t b_off; + u_int32_t ncommit, w_off; + int do_flush, first, ret; + + env = dblp->env; + lp = dblp->reginfo.primary; + ncommit = 0; + ret = 0; + + if (lp->db_log_inmemory) { + lp->s_lsn = lp->lsn; + STAT(++lp->stat.st_scount); + return (0); + } + + /* + * If no LSN specified, flush the entire log by setting the flush LSN + * to the last LSN written in the log. Otherwise, check that the LSN + * isn't a non-existent record for the log. + */ + if (lsnp == NULL) { + flush_lsn.file = lp->lsn.file; + flush_lsn.offset = lp->lsn.offset - lp->len; + } else if (lsnp->file > lp->lsn.file || + (lsnp->file == lp->lsn.file && + lsnp->offset > lp->lsn.offset - lp->len)) { + __db_errx(env, DB_STR_A("2516", + "DB_ENV->log_flush: LSN of %lu/%lu past current end-of-log of %lu/%lu", + "%lu %lu %lu %lu"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)lp->lsn.file, + (u_long)lp->lsn.offset); + __db_errx(env, DB_STR("2517", + "Database environment corrupt; the wrong log files may " + "have been removed or incompatible database files " + "imported from another environment")); + return (__env_panic(env, DB_RUNRECOVERY)); + } else { + if (ALREADY_FLUSHED(lp, lsnp)) + return (0); + flush_lsn = *lsnp; + } + + /* + * If a flush is in progress and we're allowed to do so, drop + * the region lock and block waiting for the next flush. + */ + if (release && lp->in_flush != 0) { + if ((commit = SH_TAILQ_FIRST( + &lp->free_commits, __db_commit)) == NULL) { + if ((ret = __env_alloc(&dblp->reginfo, + sizeof(struct __db_commit), &commit)) != 0) + goto flush; + memset(commit, 0, sizeof(*commit)); + if ((ret = __mutex_alloc(env, MTX_TXN_COMMIT, + DB_MUTEX_SELF_BLOCK, &commit->mtx_txnwait)) != 0) { + __env_alloc_free(&dblp->reginfo, commit); + return (ret); + } + MUTEX_LOCK(env, commit->mtx_txnwait); + } else + SH_TAILQ_REMOVE( + &lp->free_commits, commit, links, __db_commit); + + lp->ncommit++; + + /* + * Flushes may be requested out of LSN order; be + * sure we only move lp->t_lsn forward. + */ + if (LOG_COMPARE(&lp->t_lsn, &flush_lsn) < 0) + lp->t_lsn = flush_lsn; + + commit->lsn = flush_lsn; + SH_TAILQ_INSERT_HEAD( + &lp->commits, commit, links, __db_commit); + LOG_SYSTEM_UNLOCK(env); + /* Wait here for the in-progress flush to finish. */ + MUTEX_LOCK(env, commit->mtx_txnwait); + LOG_SYSTEM_LOCK(env); + + lp->ncommit--; + /* + * Grab the flag before freeing the struct to see if + * we need to flush the log to commit. If so, + * use the maximal lsn for any committing thread. + */ + do_flush = F_ISSET(commit, DB_COMMIT_FLUSH); + F_CLR(commit, DB_COMMIT_FLUSH); + SH_TAILQ_INSERT_HEAD( + &lp->free_commits, commit, links, __db_commit); + if (do_flush) { + lp->in_flush--; + flush_lsn = lp->t_lsn; + } else + return (0); + } + + /* + * Protect flushing with its own mutex so we can release + * the region lock except during file switches. + */ +flush: MUTEX_LOCK(env, lp->mtx_flush); + + /* + * If the LSN is less than or equal to the last-sync'd LSN, we're done. + * Note, the last-sync LSN saved in s_lsn is the LSN of the first byte + * after the byte we absolutely know was written to disk, so the test + * is <, not <=. + */ + if (flush_lsn.file < lp->s_lsn.file || + (flush_lsn.file == lp->s_lsn.file && + flush_lsn.offset < lp->s_lsn.offset)) { + MUTEX_UNLOCK(env, lp->mtx_flush); + goto done; + } + + /* + * We may need to write the current buffer. We have to write the + * current buffer if the flush LSN is greater than or equal to the + * buffer's starting LSN. + * + * Otherwise, it's still possible that this thread may never have + * written to this log file. Acquire a file descriptor if we don't + * already have one. + */ + if (lp->b_off != 0 && LOG_COMPARE(&flush_lsn, &lp->f_lsn) >= 0) { + if ((ret = __log_write(dblp, + dblp->bufp, (u_int32_t)lp->b_off)) != 0) { + MUTEX_UNLOCK(env, lp->mtx_flush); + goto done; + } + + lp->b_off = 0; + } else if (dblp->lfhp == NULL || dblp->lfname != lp->lsn.file) + if ((ret = __log_newfh(dblp, 0)) != 0) { + MUTEX_UNLOCK(env, lp->mtx_flush); + goto done; + } + + /* + * We are going to flush, release the region. + * First get the current state of the buffer since + * another write may come in, but we may not flush it. + */ + b_off = lp->b_off; + w_off = lp->w_off; + f_lsn = lp->f_lsn; + lp->in_flush++; + if (release) + LOG_SYSTEM_UNLOCK(env); + + /* Sync all writes to disk. */ + if ((ret = __os_fsync(env, dblp->lfhp)) != 0) { + MUTEX_UNLOCK(env, lp->mtx_flush); + if (release) + LOG_SYSTEM_LOCK(env); + lp->in_flush--; + goto done; + } + + /* + * Set the last-synced LSN. + * This value must be set to the LSN past the last complete + * record that has been flushed. This is at least the first + * lsn, f_lsn. If the buffer is empty, b_off == 0, then + * we can move up to write point since the first lsn is not + * set for the new buffer. + */ + lp->s_lsn = f_lsn; + if (b_off == 0) + lp->s_lsn.offset = w_off; + + MUTEX_UNLOCK(env, lp->mtx_flush); + if (release) + LOG_SYSTEM_LOCK(env); + + lp->in_flush--; + STAT(++lp->stat.st_scount); + + /* + * How many flush calls (usually commits) did this call actually sync? + * At least one, if it got here. + */ + ncommit = 1; +done: + if (lp->ncommit != 0) { + first = 1; + SH_TAILQ_FOREACH(commit, &lp->commits, links, __db_commit) + if (LOG_COMPARE(&lp->s_lsn, &commit->lsn) > 0) { + MUTEX_UNLOCK(env, commit->mtx_txnwait); + SH_TAILQ_REMOVE( + &lp->commits, commit, links, __db_commit); + ncommit++; + } else if (first == 1) { + F_SET(commit, DB_COMMIT_FLUSH); + MUTEX_UNLOCK(env, commit->mtx_txnwait); + SH_TAILQ_REMOVE( + &lp->commits, commit, links, __db_commit); + /* + * This thread will wake and flush. + * If another thread commits and flushes + * first we will waste a trip trough the + * mutex. + */ + lp->in_flush++; + first = 0; + } + } +#ifdef HAVE_STATISTICS + if (lp->stat.st_maxcommitperflush < ncommit) + lp->stat.st_maxcommitperflush = ncommit; + if (lp->stat.st_mincommitperflush > ncommit || + lp->stat.st_mincommitperflush == 0) + lp->stat.st_mincommitperflush = ncommit; +#endif + + return (ret); +} + +/* + * __log_fill -- + * Write information into the log. + */ +static int +__log_fill(dblp, lsn, addr, len) + DB_LOG *dblp; + DB_LSN *lsn; + void *addr; + u_int32_t len; +{ + LOG *lp; + u_int32_t bsize, nrec; + size_t nw, remain; + int ret; + + lp = dblp->reginfo.primary; + bsize = lp->buffer_size; + + if (lp->db_log_inmemory) { + __log_inmem_copyin(dblp, lp->b_off, addr, len); + lp->b_off = (lp->b_off + len) % lp->buffer_size; + return (0); + } + + while (len > 0) { /* Copy out the data. */ + /* + * If we're beginning a new buffer, note the user LSN to which + * the first byte of the buffer belongs. We have to know this + * when flushing the buffer so that we know if the in-memory + * buffer needs to be flushed. + */ + if (lp->b_off == 0) + lp->f_lsn = *lsn; + + /* + * If we're on a buffer boundary and the data is big enough, + * copy as many records as we can directly from the data. + */ + if (lp->b_off == 0 && len >= bsize) { + nrec = len / bsize; + if ((ret = __log_write(dblp, addr, nrec * bsize)) != 0) + return (ret); + addr = (u_int8_t *)addr + nrec * bsize; + len -= nrec * bsize; + STAT(++lp->stat.st_wcount_fill); + continue; + } + + /* Figure out how many bytes we can copy this time. */ + remain = bsize - lp->b_off; + nw = remain > len ? len : remain; + memcpy(dblp->bufp + lp->b_off, addr, nw); + addr = (u_int8_t *)addr + nw; + len -= (u_int32_t)nw; + lp->b_off += (u_int32_t)nw; + + /* If we fill the buffer, flush it. */ + if (lp->b_off == bsize) { + if ((ret = __log_write(dblp, dblp->bufp, bsize)) != 0) + return (ret); + lp->b_off = 0; + STAT(++lp->stat.st_wcount_fill); + } + } + return (0); +} + +/* + * __log_write -- + * Write the log buffer to disk. + */ +static int +__log_write(dblp, addr, len) + DB_LOG *dblp; + void *addr; + u_int32_t len; +{ + ENV *env; + LOG *lp; + size_t nw; + int ret; + + env = dblp->env; + lp = dblp->reginfo.primary; + + DB_ASSERT(env, !lp->db_log_inmemory); + + /* + * If we haven't opened the log file yet or the current one has + * changed, acquire a new log file. We are creating the file if we're + * about to write to the start of it, in other words, if the write + * offset is zero. + */ + if (dblp->lfhp == NULL || dblp->lfname != lp->lsn.file || + dblp->lf_timestamp != lp->timestamp) + if ((ret = __log_newfh(dblp, lp->w_off == 0)) != 0) + return (ret); + + /* + * If we're writing the first block in a log file on a filesystem that + * guarantees unwritten blocks are zero-filled, we set the size of the + * file in advance. This increases sync performance on some systems, + * because they don't need to update metadata on every sync. + * + * Ignore any error -- we may have run out of disk space, but that's no + * reason to quit. + */ +#ifdef HAVE_FILESYSTEM_NOTZERO + if (lp->w_off == 0 && !__os_fs_notzero()) { +#else + if (lp->w_off == 0) { +#endif + (void)__db_file_extend(env, dblp->lfhp, lp->log_size); + if (F_ISSET(dblp, DBLOG_ZERO)) + (void)__db_zero_extend(env, dblp->lfhp, + 0, lp->log_size/lp->buffer_size, lp->buffer_size); + + } + + /* + * Seek to the offset in the file (someone may have written it + * since we last did). + */ + if ((ret = __os_io(env, DB_IO_WRITE, + dblp->lfhp, 0, 0, lp->w_off, len, addr, &nw)) != 0) + return (ret); + + /* Reset the buffer offset and update the seek offset. */ + lp->w_off += len; + + /* Update written statistics. */ + if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) { + lp->stat.st_wc_bytes -= MEGABYTE; + ++lp->stat.st_wc_mbytes; + } +#ifdef HAVE_STATISTICS + if ((lp->stat.st_w_bytes += len) >= MEGABYTE) { + lp->stat.st_w_bytes -= MEGABYTE; + ++lp->stat.st_w_mbytes; + } + ++lp->stat.st_wcount; +#endif + + return (0); +} + +/* + * __log_file_pp -- + * ENV->log_file pre/post processing. + * + * PUBLIC: int __log_file_pp __P((DB_ENV *, const DB_LSN *, char *, size_t)); + */ +int +__log_file_pp(dbenv, lsn, namep, len) + DB_ENV *dbenv; + const DB_LSN *lsn; + char *namep; + size_t len; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret, set; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_file", DB_INIT_LOG); + + if ((ret = __log_get_config(dbenv, DB_LOG_IN_MEMORY, &set)) != 0) + return (ret); + if (set) { + __db_errx(env, DB_STR("2518", + "DB_ENV->log_file is illegal with in-memory logs")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_file(env, lsn, namep, len)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_file -- + * ENV->log_file. + */ +static int +__log_file(env, lsn, namep, len) + ENV *env; + const DB_LSN *lsn; + char *namep; + size_t len; +{ + DB_LOG *dblp; + int ret; + char *name; + + dblp = env->lg_handle; + LOG_SYSTEM_LOCK(env); + ret = __log_name(dblp, lsn->file, &name, NULL, 0); + LOG_SYSTEM_UNLOCK(env); + if (ret != 0) + return (ret); + + /* Check to make sure there's enough room and copy the name. */ + if (len < strlen(name) + 1) { + *namep = '\0'; + __db_errx(env, DB_STR("2519", + "DB_ENV->log_file: name buffer is too short")); + return (EINVAL); + } + (void)strcpy(namep, name); + __os_free(env, name); + + return (0); +} + +/* + * __log_newfh -- + * Acquire a file handle for the current log file. + */ +static int +__log_newfh(dblp, create) + DB_LOG *dblp; + int create; +{ + ENV *env; + LOG *lp; + u_int32_t flags; + int ret; + logfile_validity status; + + env = dblp->env; + lp = dblp->reginfo.primary; + + /* Close any previous file descriptor. */ + if (dblp->lfhp != NULL) { + (void)__os_closehandle(env, dblp->lfhp); + dblp->lfhp = NULL; + } + + flags = DB_OSO_SEQ | + (create ? DB_OSO_CREATE : 0) | + (F_ISSET(dblp, DBLOG_DIRECT) ? DB_OSO_DIRECT : 0) | + (F_ISSET(dblp, DBLOG_DSYNC) ? DB_OSO_DSYNC : 0); + + /* Get the path of the new file and open it. */ + dblp->lfname = lp->lsn.file; + if ((ret = __log_valid(dblp, dblp->lfname, 0, &dblp->lfhp, + flags, &status, NULL)) != 0) + __db_err(env, ret, + "DB_ENV->log_newfh: %lu", (u_long)lp->lsn.file); + else if (status != DB_LV_NORMAL && status != DB_LV_INCOMPLETE && + status != DB_LV_OLD_READABLE) + ret = DB_NOTFOUND; + + return (ret); +} + +/* + * __log_name -- + * Return the log name for a particular file, and optionally open it. + * + * PUBLIC: int __log_name __P((DB_LOG *, + * PUBLIC: u_int32_t, char **, DB_FH **, u_int32_t)); + */ +int +__log_name(dblp, filenumber, namep, fhpp, flags) + DB_LOG *dblp; + u_int32_t filenumber, flags; + char **namep; + DB_FH **fhpp; +{ + ENV *env; + LOG *lp; + int mode, ret; + char *oname; + char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20]; + + env = dblp->env; + lp = dblp->reginfo.primary; + + DB_ASSERT(env, !lp->db_log_inmemory); + + /* + * !!! + * The semantics of this routine are bizarre. + * + * The reason for all of this is that we need a place where we can + * intercept requests for log files, and, if appropriate, check for + * both the old-style and new-style log file names. The trick is + * that all callers of this routine that are opening the log file + * read-only want to use an old-style file name if they can't find + * a match using a new-style name. The only down-side is that some + * callers may check for the old-style when they really don't need + * to, but that shouldn't mess up anything, and we only check for + * the old-style name when we've already failed to find a new-style + * one. + * + * Create a new-style file name, and if we're not going to open the + * file, return regardless. + */ + (void)snprintf(new, sizeof(new), LFNAME, filenumber); + if ((ret = __db_appname(env, + DB_APP_LOG, new, NULL, namep)) != 0 || fhpp == NULL) + return (ret); + + /* The application may have specified an absolute file mode. */ + if (lp->filemode == 0) + mode = env->db_mode; + else { + LF_SET(DB_OSO_ABSMODE); + mode = lp->filemode; + } + + /* Open the new-style file -- if we succeed, we're done. */ + dblp->lf_timestamp = lp->timestamp; + if ((ret = __os_open(env, *namep, 0, flags, mode, fhpp)) == 0) + return (0); + + /* + * If the open failed for reason other than the file + * not being there, complain loudly, the wrong user + * probably started up the application. + */ + if (ret != ENOENT) { + __db_err(env, ret, DB_STR_A("2520", + "%s: log file unreadable", "%s"), *namep); + return (__env_panic(env, ret)); + } + + /* + * The open failed... if the DB_RDONLY flag isn't set, we're done, + * the caller isn't interested in old-style files. + */ + if (!LF_ISSET(DB_OSO_RDONLY)) { + __db_err(env, ret, DB_STR_A("2521", + "%s: log file open failed", "%s"), *namep); + return (__env_panic(env, ret)); + } + + /* Create an old-style file name. */ + (void)snprintf(old, sizeof(old), LFNAME_V1, filenumber); + if ((ret = __db_appname(env, + DB_APP_LOG, old, NULL, &oname)) != 0) + goto err; + + /* + * Open the old-style file -- if we succeed, we're done. Free the + * space allocated for the new-style name and return the old-style + * name to the caller. + */ + if ((ret = __os_open(env, oname, 0, flags, mode, fhpp)) == 0) { + __os_free(env, *namep); + *namep = oname; + return (0); + } + + /* + * Couldn't find either style of name -- return the new-style name + * for the caller's error message. If it's an old-style name that's + * actually missing we're going to confuse the user with the error + * message, but that implies that not only were we looking for an + * old-style name, but we expected it to exist and we weren't just + * looking for any log file. That's not a likely error. + */ +err: __os_free(env, oname); + return (ret); +} + +/* + * __log_rep_put -- + * Short-circuit way for replication clients to put records into the + * log. Replication clients' logs need to be laid out exactly as their masters' + * are, so we let replication take responsibility for when the log gets + * flushed, when log switches files, etc. This is just a thin PUBLIC wrapper + * for __log_putr with a slightly prettier interface. + * + * Note that the REP->mtx_clientdb should be held when this is called. + * Note that we acquire the log region mutex while holding mtx_clientdb. + * + * PUBLIC: int __log_rep_put __P((ENV *, DB_LSN *, const DBT *, u_int32_t)); + */ +int +__log_rep_put(env, lsnp, rec, flags) + ENV *env; + DB_LSN *lsnp; + const DBT *rec; + u_int32_t flags; +{ + DBT *dbt, t; + DB_CIPHER *db_cipher; + DB_LOG *dblp; + HDR hdr; + LOG *lp; + int need_free, ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + LOG_SYSTEM_LOCK(env); + memset(&hdr, 0, sizeof(HDR)); + t = *rec; + dbt = &t; + need_free = 0; + db_cipher = env->crypto_handle; + if (CRYPTO_ON(env)) + t.size += db_cipher->adj_size(rec->size); + if ((ret = __os_calloc(env, 1, t.size, &t.data)) != 0) + goto err; + need_free = 1; + memcpy(t.data, rec->data, rec->size); + + if ((ret = __log_encrypt_record(env, dbt, &hdr, rec->size)) != 0) + goto err; + + DB_ASSERT(env, LOG_COMPARE(lsnp, &lp->lsn) == 0); + ret = __log_putr(dblp, lsnp, dbt, lp->lsn.offset - lp->len, &hdr); +err: + /* + * !!! Assume caller holds REP->mtx_clientdb to modify ready_lsn. + */ + lp->ready_lsn = lp->lsn; + + if (LF_ISSET(DB_LOG_CHKPNT)) + lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; + + /* Increment count of records added to the log. */ + STAT(++lp->stat.st_record); + LOG_SYSTEM_UNLOCK(env); + if (need_free) + __os_free(env, t.data); + return (ret); +} + +static int +__log_encrypt_record(env, dbt, hdr, orig) + ENV *env; + DBT *dbt; + HDR *hdr; + u_int32_t orig; +{ + DB_CIPHER *db_cipher; + int ret; + + if (CRYPTO_ON(env)) { + db_cipher = env->crypto_handle; + hdr->size = HDR_CRYPTO_SZ; + hdr->orig_size = orig; + if ((ret = db_cipher->encrypt(env, db_cipher->data, + hdr->iv, dbt->data, dbt->size)) != 0) + return (ret); + } else { + hdr->size = HDR_NORMAL_SZ; + } + return (0); +} +/* + * __log_put_record_pp -- + * DB_ENV->log_put_record pre/post processing. + * + * PUBLIC: int __log_put_record_pp __P((DB_ENV *, DB *, DB_TXN *, DB_LSN *, + * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int32_t, + * PUBLIC: DB_LOG_RECSPEC *, ...)); + */ +#ifdef STDC_HEADERS +int +__log_put_record_pp(DB_ENV *dbenv, DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, + u_int32_t flags, u_int32_t rectype, u_int32_t has_data, u_int32_t size, + DB_LOG_RECSPEC *spec, ...) +#else +int +__log_put_record_pp(dbenv, dbp, txnp, ret_lsnp, + flags, rectype, has_data, size, + spec, va_alist) + DB_ENV *dbenv; + DB *dbp; + DB_TXN *txnp; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t rectype; + u_int32_t has_data; + u_int32_t size; + DB_LOG_RECSPEC *spec; + va_dcl +#endif +{ + DB_THREAD_INFO *ip; + ENV *env; + va_list argp; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_put_record", DB_INIT_LOG); + + /* Validate arguments: check for allowed flags. */ + if ((ret = __db_fchk(env, "DB_ENV->log_put_record", flags, + DB_LOG_CHKPNT | DB_LOG_COMMIT | + DB_FLUSH | DB_LOG_NOCOPY | DB_LOG_WRNOSYNC)) != 0) + return (ret); + + /* DB_LOG_WRNOSYNC and DB_FLUSH are mutually exclusive. */ + if (LF_ISSET(DB_LOG_WRNOSYNC) && LF_ISSET(DB_FLUSH)) + return (__db_ferr(env, "DB_ENV->log_put_record", 1)); + + /* Replication clients should never write log records. */ + if (IS_REP_CLIENT(env)) { + __db_errx(env, DB_STR("2522", + "DB_ENV->log_put is illegal on replication clients")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + va_start(argp, spec); + REPLICATION_WRAP(env, (__log_put_record_int(env, dbp, + txnp, ret_lsnp, flags, rectype, has_data, size, spec, argp)), + 0, ret); + va_end(argp); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * PUBLIC: int __log_put_record __P((ENV *, DB *, DB_TXN *, DB_LSN *, + * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int32_t, + * PUBLIC: DB_LOG_RECSPEC *, ...)); + */ +#ifdef STDC_HEADERS +int +__log_put_record(ENV *env, DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, + u_int32_t flags, u_int32_t rectype, u_int32_t has_data, u_int32_t size, + DB_LOG_RECSPEC *spec, ...) +#else +int +__log_put_record(env, dbp, txnp, ret_lsnp, + flags, rectype, has_data, size, spec, va_alist); + ENV *env; + DB *dbp; + DB_TXN *txnp; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t rectype; + u_int32_t has_data; + u_int32_t size; + DB_LOG_RECSPEC *spec; + va_dcl +#endif +{ + va_list argp; + int ret; + + va_start(argp, spec); + ret = __log_put_record_int(env, dbp, txnp, ret_lsnp, flags, + rectype, has_data, size, spec, argp); + va_end(argp); + return (ret); +} + +#ifdef STDC_HEADERS +static int +__log_put_record_int(ENV *env, DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, + u_int32_t flags, u_int32_t rectype, u_int32_t has_data, u_int32_t size, + DB_LOG_RECSPEC *spec, va_list argp) +#else +int +__log_put_record_int(env, dbp, txnp, ret_lsnp, + flags, rectype, has_data, size, spec, argp); + ENV *env; + DB *dbp; + DB_TXN *txnp; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t has_data; + u_int32_t size; + u_int32_t rectype; + DB_LOG_RECSPEC *spec; + va_list argp; +#endif +{ + DBT *data, *dbt, *header, logrec; + DB_LOG_RECSPEC *sp; + DB_LSN *lsnp, lsn, null_lsn, *pagelsn, *rlsnp; + DB_TXNLOGREC *lr; + LOG *lp; + PAGE *pghdrstart; + u_int32_t hdrsize, op, zero, uinttmp, txn_num; + u_int npad; + u_int8_t *bp; + int is_durable, ret; + void *hdrstart; + + COMPQUIET(lr, NULL); + COMPQUIET(hdrsize, 0); + COMPQUIET(op, 0); + COMPQUIET(hdrstart, NULL); + COMPQUIET(pghdrstart, NULL); + COMPQUIET(header, NULL); + + /* + * rlsnp will be stored into while holding the log system lock. + * If this is a commit record then ret_lsnp will be the address of + * the transaction detail visible_lsn field. If not then this + * may be the lsn of a page and we do not want to set it if + * the log_put fails after writing the record (due to an I/O error). + */ + if (LF_ISSET(DB_LOG_COMMIT)) + rlsnp = ret_lsnp; + else + rlsnp = &lsn; + npad = 0; + ret = 0; + data = NULL; + + if (LF_ISSET(DB_LOG_NOT_DURABLE) || + (dbp != NULL && F_ISSET(dbp, DB_AM_NOT_DURABLE))) { + if (txnp == NULL) + return (0); + is_durable = 0; + } else + is_durable = 1; + + if (txnp == NULL) { + txn_num = 0; + lsnp = &null_lsn; + null_lsn.file = null_lsn.offset = 0; + } else { + if (TAILQ_FIRST(&txnp->kids) != NULL && + (ret = __txn_activekids(env, rectype, txnp)) != 0) + return (ret); + /* + * We need to assign begin_lsn while holding region mutex. + * That assignment is done inside the DbEnv->log_put call, + * so pass in the appropriate memory location to be filled + * in by the log_put code. + */ + DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); + txn_num = txnp->txnid; + } + + if (dbp != NULL) { + DB_ASSERT(env, dbp->log_filename != NULL); + if (dbp->log_filename->id == DB_LOGFILEID_INVALID && + (ret = __dbreg_lazy_id(dbp)) != 0) + return (ret); + } + + logrec.size = size; + + if (CRYPTO_ON(env)) { + npad = env->crypto_handle->adj_size(logrec.size); + logrec.size += npad; + } + + if (is_durable || txnp == NULL) { + if ((ret = + __os_malloc(env, logrec.size, &logrec.data)) != 0) + return (ret); + } else { + if ((ret = __os_malloc(env, + logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) + return (ret); +#ifdef DIAGNOSTIC + if ((ret = + __os_malloc(env, logrec.size, &logrec.data)) != 0) { + __os_free(env, lr); + return (ret); + } +#else + logrec.data = lr->data; +#endif + } + if (npad > 0) + memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); + + bp = logrec.data; + + LOGCOPY_32(env, bp, &rectype); + bp += sizeof(rectype); + + LOGCOPY_32(env, bp, &txn_num); + bp += sizeof(txn_num); + + LOGCOPY_FROMLSN(env, bp, lsnp); + bp += sizeof(DB_LSN); + + zero = 0; + lp = env->lg_handle->reginfo.primary; + for (sp = spec; sp->type != LOGREC_Done; sp++) { + switch (sp->type) { + case LOGREC_DB: + /* This is not in the varargs. */ + uinttmp = (u_int32_t)dbp->log_filename->id; + LOGCOPY_32(env, bp, &uinttmp); + bp += sizeof(uinttmp); + break; + + case LOGREC_ARG: + case LOGREC_TIME: + case LOGREC_DBOP: + uinttmp = va_arg(argp, u_int32_t); + LOGCOPY_32(env, bp, &uinttmp); + bp += sizeof(uinttmp); + break; + case LOGREC_OP: + op = va_arg(argp, u_int32_t); + LOGCOPY_32(env, bp, &op); + bp += sizeof(uinttmp); + break; + case LOGREC_DBT: + case LOGREC_PGLIST: + case LOGREC_LOCKS: + case LOGREC_HDR: + case LOGREC_DATA: + dbt = va_arg(argp, DBT *); + if (dbt == NULL) { + LOGCOPY_32(env, bp, &zero); + bp += sizeof(u_int32_t); + } else { + LOGCOPY_32(env, bp, &dbt->size); + bp += sizeof(dbt->size); + memcpy(bp, dbt->data, dbt->size); + } + /* Process fields that need to be byte swapped. */ + if (dbp != NULL && F_ISSET(dbp, DB_AM_SWAP)) { + if (sp->type == LOGREC_HDR && + dbt != NULL && has_data == 0) + __db_recordswap(op, + dbt->size, bp, NULL, 0); + else if (sp->type == LOGREC_HDR) { + hdrstart = bp; + hdrsize = dbt == NULL ? 0 : dbt->size; + } else if (sp->type == LOGREC_DATA) { + __db_recordswap(op, + hdrsize, hdrstart, bp, 0); + has_data = 0; + } + } + if (dbt != NULL) + bp += dbt->size; + + break; + /* + * Page header and data -- we assume that the header + * is listed first and the data follows sometime later. + * There should be only one header/data pair per record. + */ + case LOGREC_PGDBT: + header = va_arg(argp, DBT *); + if (header == NULL) { + LOGCOPY_32(env, bp, &zero); + bp += sizeof(u_int32_t); + } else { + LOGCOPY_32(env, bp, &header->size); + bp += sizeof(header->size); + pghdrstart = (PAGE *)bp; + memcpy(bp, header->data, header->size); + if (has_data == 0 && + F_ISSET(dbp, DB_AM_SWAP) && + (ret = __db_pageswap( + env, dbp, pghdrstart, (size_t)header->size, + NULL, 0)) != 0) + return (ret); + bp += header->size; + } + break; + + case LOGREC_PGDDBT: + data = va_arg(argp, DBT *); + if (data == NULL) { + zero = 0; + LOGCOPY_32(env, bp, &zero); + bp += sizeof(u_int32_t); + } else { + if (F_ISSET(dbp, DB_AM_SWAP) && + (ret = __db_pageswap(env, dbp, pghdrstart, + (size_t)header->size, (DBT *)data, 0)) != 0) + return (ret); + LOGCOPY_32(env, bp, &data->size); + bp += sizeof(data->size); + memcpy(bp, data->data, data->size); + if (F_ISSET(dbp, DB_AM_SWAP) && + F_ISSET(data, DB_DBT_APPMALLOC)) + __os_free(env, data->data); + bp += data->size; + } + break; + case LOGREC_POINTER: + pagelsn = va_arg(argp, DB_LSN *); + if (pagelsn != NULL) { + if (txnp != NULL) { + if (LOG_COMPARE(pagelsn, + &lp->lsn) >= 0 && (ret = + __log_check_page_lsn(env, + dbp, pagelsn)) != 0) + return (ret); + } + LOGCOPY_FROMLSN(env, bp, pagelsn); + } else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); + break; + + default: + DB_ASSERT(env, sp->type != sp->type); + } + } + + DB_ASSERT(env, + (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + + if (is_durable || txnp == NULL) { + if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, + flags | DB_LOG_NOCOPY)) == 0) { + if (txnp != NULL) + *lsnp = *rlsnp; + *ret_lsnp = *rlsnp; + } + } else { + ret = 0; +#ifdef DIAGNOSTIC + /* + * Set the debug bit if we are going to log non-durable + * transactions so they will be ignored by recovery. + */ + memcpy(lr->data, logrec.data, logrec.size); + rectype |= DB_debug_FLAG; + LOGCOPY_32(env, logrec.data, &rectype); + + if (!IS_REP_CLIENT(env) && !lp->db_log_inmemory) + ret = __log_put(env, + rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); +#endif + STAILQ_INSERT_HEAD(&txnp->logs, lr, links); + F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); + LSN_NOT_LOGGED(*ret_lsnp); + } + +#ifdef LOG_DIAGNOSTIC + if (ret != 0) + (void)__db_addrem_print(env, + (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); +#endif + +#ifdef DIAGNOSTIC + __os_free(env, logrec.data); +#else + if (is_durable || txnp == NULL) + __os_free(env, logrec.data); +#endif + return (ret); +} diff --git a/src/log/log_stat.c b/src/log/log_stat.c new file mode 100644 index 00000000..37b74c74 --- /dev/null +++ b/src/log/log_stat.c @@ -0,0 +1,336 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +#ifdef HAVE_STATISTICS +static int __log_print_all __P((ENV *, u_int32_t)); +static int __log_print_stats __P((ENV *, u_int32_t)); +static int __log_stat __P((ENV *, DB_LOG_STAT **, u_int32_t)); + +/* + * __log_stat_pp -- + * DB_ENV->log_stat pre/post processing. + * + * PUBLIC: int __log_stat_pp __P((DB_ENV *, DB_LOG_STAT **, u_int32_t)); + */ +int +__log_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_LOG_STAT **statp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_stat", DB_INIT_LOG); + + if ((ret = __db_fchk(env, + "DB_ENV->log_stat", flags, DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_stat(env, statp, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_stat -- + * DB_ENV->log_stat. + */ +static int +__log_stat(env, statp, flags) + ENV *env; + DB_LOG_STAT **statp; + u_int32_t flags; +{ + DB_LOG *dblp; + DB_LOG_STAT *stats; + LOG *lp; + int ret; + + *statp = NULL; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + if ((ret = __os_umalloc(env, sizeof(DB_LOG_STAT), &stats)) != 0) + return (ret); + + /* Copy out the global statistics. */ + LOG_SYSTEM_LOCK(env); + *stats = lp->stat; + if (LF_ISSET(DB_STAT_CLEAR)) + memset(&lp->stat, 0, sizeof(lp->stat)); + + stats->st_magic = lp->persist.magic; + stats->st_version = lp->persist.version; + stats->st_mode = lp->filemode; + stats->st_lg_bsize = lp->buffer_size; + stats->st_lg_size = lp->log_nsize; + + __mutex_set_wait_info(env, lp->mtx_region, + &stats->st_region_wait, &stats->st_region_nowait); + if (LF_ISSET(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM) == DB_STAT_CLEAR) + __mutex_clear(env, lp->mtx_region); + stats->st_regsize = dblp->reginfo.rp->size; + + stats->st_cur_file = lp->lsn.file; + stats->st_cur_offset = lp->lsn.offset; + stats->st_disk_file = lp->s_lsn.file; + stats->st_disk_offset = lp->s_lsn.offset; + + LOG_SYSTEM_UNLOCK(env); + + *statp = stats; + return (0); +} + +/* + * __log_stat_print_pp -- + * DB_ENV->log_stat_print pre/post processing. + * + * PUBLIC: int __log_stat_print_pp __P((DB_ENV *, u_int32_t)); + */ +int +__log_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_stat_print", DB_INIT_LOG); + + if ((ret = __db_fchk(env, "DB_ENV->log_stat_print", + flags, DB_STAT_ALL | DB_STAT_ALLOC | DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_stat_print(env, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_stat_print -- + * DB_ENV->log_stat_print method. + * + * PUBLIC: int __log_stat_print __P((ENV *, u_int32_t)); + */ +int +__log_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + u_int32_t orig_flags; + int ret; + + orig_flags = flags; + LF_CLR(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM); + if (flags == 0 || LF_ISSET(DB_STAT_ALL)) { + ret = __log_print_stats(env, orig_flags); + if (flags == 0 || ret != 0) + return (ret); + } + + if (LF_ISSET(DB_STAT_ALL) && + (ret = __log_print_all(env, orig_flags)) != 0) + return (ret); + + return (0); +} + +/* + * __log_print_stats -- + * Display default log region statistics. + */ +static int +__log_print_stats(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_LOG_STAT *sp; + int ret; + + if ((ret = __log_stat(env, &sp, flags)) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) + __db_msg(env, "Default logging region information:"); + STAT_HEX("Log magic number", sp->st_magic); + STAT_ULONG("Log version number", sp->st_version); + __db_dlbytes(env, "Log record cache size", + (u_long)0, (u_long)0, (u_long)sp->st_lg_bsize); + __db_msg(env, "%#o\tLog file mode", sp->st_mode); + if (sp->st_lg_size % MEGABYTE == 0) + __db_msg(env, "%luMb\tCurrent log file size", + (u_long)sp->st_lg_size / MEGABYTE); + else if (sp->st_lg_size % 1024 == 0) + __db_msg(env, "%luKb\tCurrent log file size", + (u_long)sp->st_lg_size / 1024); + else + __db_msg(env, "%lu\tCurrent log file size", + (u_long)sp->st_lg_size); + __db_dl(env, "Initial fileid allocation", (u_long)sp->st_fileid_init); + __db_dl(env, "Current fileids in use", (u_long)sp->st_nfileid); + __db_dl(env, "Maximum fileids used", (u_long)sp->st_maxnfileid); + __db_dl(env, "Records entered into the log", (u_long)sp->st_record); + __db_dlbytes(env, "Log bytes written", + (u_long)0, (u_long)sp->st_w_mbytes, (u_long)sp->st_w_bytes); + __db_dlbytes(env, "Log bytes written since last checkpoint", + (u_long)0, (u_long)sp->st_wc_mbytes, (u_long)sp->st_wc_bytes); + __db_dl(env, "Total log file I/O writes", (u_long)sp->st_wcount); + __db_dl(env, "Total log file I/O writes due to overflow", + (u_long)sp->st_wcount_fill); + __db_dl(env, "Total log file flushes", (u_long)sp->st_scount); + __db_dl(env, "Total log file I/O reads", (u_long)sp->st_rcount); + STAT_ULONG("Current log file number", sp->st_cur_file); + STAT_ULONG("Current log file offset", sp->st_cur_offset); + STAT_ULONG("On-disk log file number", sp->st_disk_file); + STAT_ULONG("On-disk log file offset", sp->st_disk_offset); + + __db_dl(env, + "Maximum commits in a log flush", (u_long)sp->st_maxcommitperflush); + __db_dl(env, + "Minimum commits in a log flush", (u_long)sp->st_mincommitperflush); + + __db_dlbytes(env, "Region size", + (u_long)0, (u_long)0, (u_long)sp->st_regsize); + __db_dl_pct(env, + "The number of region locks that required waiting", + (u_long)sp->st_region_wait, DB_PCT(sp->st_region_wait, + sp->st_region_wait + sp->st_region_nowait), NULL); + + __os_ufree(env, sp); + + return (0); +} + +/* + * __log_print_all -- + * Display debugging log region statistics. + */ +static int +__log_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + static const FN fn[] = { + { DBLOG_RECOVER, "DBLOG_RECOVER" }, + { DBLOG_FORCE_OPEN, "DBLOG_FORCE_OPEN" }, + { DBLOG_AUTOREMOVE, "DBLOG_AUTOREMOVE"}, + { DBLOG_DIRECT, "DBLOG_DIRECT"}, + { DBLOG_DSYNC, "DBLOG_DSYNC"}, + { DBLOG_FORCE_OPEN, "DBLOG_FORCE_OPEN"}, + { DBLOG_INMEMORY, "DBLOG_INMEMORY"}, + { DBLOG_OPENFILES, "DBLOG_OPENFILES"}, + { DBLOG_RECOVER, "DBLOG_RECOVER"}, + { DBLOG_ZERO, "DBLOG_ZERO"}, + { 0, NULL } + }; + DB_LOG *dblp; + LOG *lp; + + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + + LOG_SYSTEM_LOCK(env); + + __db_print_reginfo(env, &dblp->reginfo, "Log", flags); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB_LOG handle information:"); + __mutex_print_debug_single( + env, "DB_LOG handle mutex", dblp->mtx_dbreg, flags); + STAT_ULONG("Log file name", dblp->lfname); + __db_print_fh(env, "Log file handle", dblp->lfhp, flags); + __db_prflags(env, NULL, dblp->flags, fn, NULL, "\tFlags"); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "LOG handle information:"); + __mutex_print_debug_single( + env, "LOG region mutex", lp->mtx_region, flags); + __mutex_print_debug_single( + env, "File name list mutex", lp->mtx_filelist, flags); + + STAT_HEX("persist.magic", lp->persist.magic); + STAT_ULONG("persist.version", lp->persist.version); + __db_dlbytes(env, + "persist.log_size", (u_long)0, (u_long)0, lp->persist.log_size); + STAT_FMT("log file permissions mode", "%#lo", u_long, lp->filemode); + STAT_LSN("current file offset LSN", &lp->lsn); + STAT_LSN("first buffer byte LSN", &lp->lsn); + STAT_ULONG("current buffer offset", lp->b_off); + STAT_ULONG("current file write offset", lp->w_off); + STAT_ULONG("length of last record", lp->len); + STAT_LONG("log flush in progress", lp->in_flush); + __mutex_print_debug_single( + env, "Log flush mutex", lp->mtx_flush, flags); + + STAT_LSN("last sync LSN", &lp->s_lsn); + + /* + * Don't display the replication fields here, they're displayed as part + * of the replication statistics. + */ + + STAT_LSN("cached checkpoint LSN", &lp->cached_ckp_lsn); + + __db_dlbytes(env, + "log buffer size", (u_long)0, (u_long)0, lp->buffer_size); + __db_dlbytes(env, + "log file size", (u_long)0, (u_long)0, lp->log_size); + __db_dlbytes(env, + "next log file size", (u_long)0, (u_long)0, lp->log_nsize); + + STAT_ULONG("transactions waiting to commit", lp->ncommit); + STAT_LSN("LSN of first commit", &lp->t_lsn); + + LOG_SYSTEM_UNLOCK(env); + + return (0); +} + +#else /* !HAVE_STATISTICS */ + +int +__log_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_LOG_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} + +int +__log_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} +#endif diff --git a/src/log/log_verify.c b/src/log/log_verify.c new file mode 100644 index 00000000..e7f8f688 --- /dev/null +++ b/src/log/log_verify.c @@ -0,0 +1,437 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/fop.h" +#include "dbinc/hash.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +#include "dbinc/log_verify.h" + +#define FIRST_OFFSET(env) \ + (sizeof(LOGP) + (CRYPTO_ON(env) ? HDR_CRYPTO_SZ : HDR_NORMAL_SZ)) + +static int __env_init_verify __P((ENV *, u_int32_t, DB_DISTAB *)); + +/* + * PUBLIC: int __log_verify_pp __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *)); + */ +int +__log_verify_pp(dbenv, lvconfig) + DB_ENV *dbenv; + const DB_LOG_VERIFY_CONFIG *lvconfig; +{ + int lsnrg, ret, timerg; + DB_THREAD_INFO *ip; + const char *phome; + + lsnrg = ret = timerg = 0; + phome = NULL; + + if (!IS_ZERO_LSN(lvconfig->start_lsn) || + !IS_ZERO_LSN(lvconfig->end_lsn)) + lsnrg = 1; + if (lvconfig->start_time != 0 || lvconfig->end_time != 0) + timerg = 1; + + if ((!IS_ZERO_LSN(lvconfig->start_lsn) && lvconfig->start_time != 0) || + (!IS_ZERO_LSN(lvconfig->end_lsn) && lvconfig->end_time != 0) || + (lsnrg && timerg)) { + __db_errx(dbenv->env, DB_STR("2501", + "Set either an lsn range or a time range to verify logs " + "in the range, don't mix time and lsn.")); + ret = EINVAL; + goto err; + } + phome = dbenv->env->db_home; + if (phome != NULL && lvconfig->temp_envhome != NULL && + strcmp(phome, lvconfig->temp_envhome) == 0) { + __db_errx(dbenv->env, + "Environment home for log verification internal use " + "overlaps with that of the environment to verify."); + ret = EINVAL; + goto err; + } + + ENV_ENTER(dbenv->env, ip); + ret = __log_verify(dbenv, lvconfig, ip); + ENV_LEAVE(dbenv->env, ip); +err: return (ret); +} + +/* + * PUBLIC: int __log_verify __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *, + * PUBLIC: DB_THREAD_INFO *)); + */ +int +__log_verify(dbenv, lvconfig, ip) + DB_ENV *dbenv; + const DB_LOG_VERIFY_CONFIG *lvconfig; + DB_THREAD_INFO *ip; +{ + + u_int32_t logcflag, max_fileno; + DB_LOGC *logc; + ENV *env; + DBT data; + DB_DISTAB dtab; + DB_LSN key, start, start2, stop, stop2, verslsn; + u_int32_t newversion, version; + int cmp, fwdscroll, goprev, ret, tret; + time_t starttime, endtime; + const char *okmsg; + DB_LOG_VRFY_INFO *logvrfy_hdl; + + okmsg = NULL; + fwdscroll = 1; + max_fileno = (u_int32_t)-1; + goprev = 0; + env = dbenv->env; + logc = NULL; + memset(&dtab, 0, sizeof(dtab)); + memset(&data, 0, sizeof(data)); + version = newversion = 0; + ZERO_LSN(verslsn); + memset(&start, 0, sizeof(DB_LSN)); + memset(&start2, 0, sizeof(DB_LSN)); + memset(&stop, 0, sizeof(DB_LSN)); + memset(&stop2, 0, sizeof(DB_LSN)); + memset(&key, 0, sizeof(DB_LSN)); + memset(&verslsn, 0, sizeof(DB_LSN)); + + start = lvconfig->start_lsn; + stop = lvconfig->end_lsn; + starttime = lvconfig->start_time; + endtime = lvconfig->end_time; + + if ((ret = __create_log_vrfy_info(lvconfig, &logvrfy_hdl, ip)) != 0) + goto err; + logvrfy_hdl->lv_config = lvconfig; + if (lvconfig->continue_after_fail) + F_SET(logvrfy_hdl, DB_LOG_VERIFY_CAF); + if (lvconfig->verbose) + F_SET(logvrfy_hdl, DB_LOG_VERIFY_VERBOSE); + + /* Allocate a log cursor. */ + if ((ret = __log_cursor(dbenv->env, &logc)) != 0) { + __db_err(dbenv->env, ret, "DB_ENV->log_cursor"); + goto err; + } + /* Ignore failed chksum and go on with next one. */ + F_SET(logc->env->lg_handle, DBLOG_VERIFYING); + + /* Only scan the range that we want to verify. */ + if (fwdscroll) { + if (IS_ZERO_LSN(stop)) { + logcflag = DB_LAST; + key.file = key.offset = 0; + } else { + key = stop; + logcflag = DB_SET; + } + logvrfy_hdl->flags |= DB_LOG_VERIFY_FORWARD; + goto startscroll; + } + +vrfyscroll: + + /* + * Initialize version to 0 so that we get the + * correct version right away. + */ + version = 0; + ZERO_LSN(verslsn); + + /* + * In the log verification config struct, start_lsn and end_lsn have + * higher priority than start_time and end_time, and you can specify + * either lsn or time to start/stop verification. + */ + if (starttime != 0 || endtime != 0) { + if ((ret = __find_lsnrg_by_timerg(logvrfy_hdl, + starttime, endtime, &start2, &stop2)) != 0) + goto err; + ((DB_LOG_VERIFY_CONFIG *)lvconfig)->start_lsn = start = start2; + ((DB_LOG_VERIFY_CONFIG *)lvconfig)->end_lsn = stop = stop2; + } + + if (IS_ZERO_LSN(start)) { + logcflag = DB_FIRST; + key.file = key.offset = 0; + } else { + key = start; + logcflag = DB_SET; + F_SET(logvrfy_hdl, DB_LOG_VERIFY_PARTIAL); + } + goprev = 0; + + /* + * So far we only support verifying a specific db file. The config's + * dbfile must be prefixed with the data directory if it's not in + * environment home directory. + */ + if (lvconfig->dbfile != NULL) { + F_SET(logvrfy_hdl, + DB_LOG_VERIFY_DBFILE | DB_LOG_VERIFY_PARTIAL); + if ((ret = __set_logvrfy_dbfuid(logvrfy_hdl)) != 0) + goto err; + } + +startscroll: + + memset(&data, 0, sizeof(data)); + + for (;;) { + + /* + * We may have reached beyond the range we're verifying. + */ + if (!fwdscroll && !IS_ZERO_LSN(stop)) { + cmp = LOG_COMPARE(&key, &stop); + if (cmp > 0) + break; + } + if (fwdscroll && !IS_ZERO_LSN(start)) { + cmp = LOG_COMPARE(&key, &start); + if (cmp < 0) + break; + } + + ret = __logc_get(logc, &key, &data, logcflag); + if (ret != 0) { + if (ret == DB_NOTFOUND) { + /* We may not start from the first log file. */ + if (logcflag == DB_PREV && key.file > 1) + F_SET(logvrfy_hdl, + DB_LOG_VERIFY_PARTIAL); + break; + } + __db_err(dbenv->env, ret, "DB_LOGC->get"); + /* + * When go beyond valid lsn range, we may get other + * error values than DB_NOTFOUND. + */ + goto out; + } + + if (logcflag == DB_SET) { + if (goprev) + logcflag = DB_PREV; + else + logcflag = DB_NEXT; + } else if (logcflag == DB_LAST) { + logcflag = DB_PREV; + max_fileno = key.file; + } else if (logcflag == DB_FIRST) + logcflag = DB_NEXT; + + if (key.file != verslsn.file) { + /* + * If our log file changed, we need to see if the + * version of the log file changed as well. + * If it changed, reset the print table. + */ + if ((ret = __logc_version(logc, &newversion)) != 0) { + __db_err(dbenv->env, ret, "DB_LOGC->version"); + goto err; + } + if (version != newversion) { + version = newversion; + if (!IS_LOG_VRFY_SUPPORTED(version)) { + __db_msg(dbenv->env, DB_STR_A("2502", + "[%lu][%lu] Unsupported version of log file, " + "log file number: %u, log file version: %u, " + "supported log version: %u.", + "%lu %lu %u %u %u"), + (u_long)key.file, + (u_long)key.offset, + key.file, version, DB_LOGVERSION); + if (logcflag == DB_NEXT) { + key.file += 1; + if (key.file > max_fileno) + break; + /* + * Txns don't span log versions, no need to + * set DB_LOG_VERIFY_PARTIAL here. + */ + } else { + goprev = 1; + key.file -= 1; + if (key.file == 0) + break; + } + key.offset = FIRST_OFFSET(env); + logcflag = DB_SET; + continue; + } + if ((ret = __env_init_verify(env, version, + &dtab)) != 0) { + __db_err(dbenv->env, ret, + DB_STR("2503", + "callback: initialization")); + goto err; + } + } + verslsn = key; + } + + ret = __db_dispatch(dbenv->env, &dtab, &data, &key, + DB_TXN_LOG_VERIFY, logvrfy_hdl); + + if (!fwdscroll && ret != 0) { + if (!F_ISSET(logvrfy_hdl, DB_LOG_VERIFY_CAF)) { + __db_err(dbenv->env, ret, + "[%lu][%lu] __db_dispatch", + (u_long)key.file, (u_long)key.offset); + goto err; + } else + F_SET(logvrfy_hdl, DB_LOG_VERIFY_ERR); + } + } + + if (fwdscroll) { + fwdscroll = 0; + F_CLR(logvrfy_hdl, DB_LOG_VERIFY_FORWARD); + goto vrfyscroll; + } +out: + /* + * When we arrive here ret can be 0 or errors returned by DB_LOGC->get, + * all which we have already handled. So we clear ret. + */ + ret = 0; + + /* If continuing after fail, we can complete the entire log. */ + if (F_ISSET(logvrfy_hdl, DB_LOG_VERIFY_ERR) || + F_ISSET(logvrfy_hdl, DB_LOG_VERIFY_INTERR)) + ret = DB_LOG_VERIFY_BAD; + /* + * This function can be called when the environment is alive, so + * there can be active transactions. + */ + __db_log_verify_global_report(logvrfy_hdl); + if (ret == DB_LOG_VERIFY_BAD) + okmsg = DB_STR_P("FAILED"); + else { + DB_ASSERT(dbenv->env, ret == 0); + okmsg = DB_STR_P("SUCCEEDED"); + } + + __db_msg(dbenv->env, DB_STR_A("2504", + "Log verification ended and %s.", "%s"), okmsg); + +err: + if (logc != NULL) + (void)__logc_close(logc); + if ((tret = __destroy_log_vrfy_info(logvrfy_hdl)) != 0 && ret == 0) + ret = tret; + if (dtab.int_dispatch) + __os_free(dbenv->env, dtab.int_dispatch); + if (dtab.ext_dispatch) + __os_free(dbenv->env, dtab.ext_dispatch); + + return (ret); +} + +/* + * __env_init_verify-- + */ +static int +__env_init_verify(env, version, dtabp) + ENV *env; + u_int32_t version; + DB_DISTAB *dtabp; +{ + int ret; + + /* + * We need to prime the print table with the current print + * functions. Then we overwrite only specific entries based on + * each previous version we support. + */ + if ((ret = __bam_init_verify(env, dtabp)) != 0) + goto err; + if ((ret = __crdel_init_verify(env, dtabp)) != 0) + goto err; + if ((ret = __db_init_verify(env, dtabp)) != 0) + goto err; + if ((ret = __dbreg_init_verify(env, dtabp)) != 0) + goto err; + if ((ret = __fop_init_verify(env, dtabp)) != 0) + goto err; +#ifdef HAVE_HASH + if ((ret = __ham_init_verify(env, dtabp)) != 0) + goto err; +#endif +#ifdef HAVE_HEAP + if ((ret = __heap_init_verify(env, dtabp)) != 0) + goto err; +#endif +#ifdef HAVE_QUEUE + if ((ret = __qam_init_verify(env, dtabp)) != 0) + goto err; +#endif + if ((ret = __txn_init_verify(env, dtabp)) != 0) + goto err; + + switch (version) { + case DB_LOGVERSION: + ret = 0; + break; + + default: + __db_errx(env, DB_STR_A("2505", "Not supported version %lu", + "%lu"), (u_long)version); + ret = EINVAL; + break; + } +err: return (ret); +} + +/* + * __log_verify_wrap -- + * Wrapper function for APIs of other languages, like java/c# and + * script languages. It's much easier to implement the swig layer + * when we split up the C structure. + * + * PUBLIC: int __log_verify_wrap __P((ENV *, const char *, u_int32_t, + * PUBLIC: const char *, const char *, time_t, time_t, u_int32_t, + * PUBLIC: u_int32_t, u_int32_t, u_int32_t, int, int)); + */ +int +__log_verify_wrap(env, envhome, cachesize, dbfile, dbname, + stime, etime, stfile, stoffset, efile, eoffset, caf, verbose) + ENV *env; + const char *envhome, *dbfile, *dbname; + time_t stime, etime; + u_int32_t cachesize, stfile, stoffset, efile, eoffset; + int caf, verbose; +{ + DB_LOG_VERIFY_CONFIG cfg; + + memset(&cfg, 0, sizeof(cfg)); + cfg.cachesize = cachesize; + cfg.temp_envhome = envhome; + cfg.dbfile = dbfile; + cfg.dbname = dbname; + cfg.start_time = stime; + cfg.end_time = etime; + cfg.start_lsn.file = stfile; + cfg.start_lsn.offset = stoffset; + cfg.end_lsn.file = efile; + cfg.end_lsn.offset = eoffset; + cfg.continue_after_fail = caf; + cfg.verbose = verbose; + + return __log_verify_pp(env->dbenv, &cfg); +} diff --git a/src/log/log_verify_auto.c b/src/log/log_verify_auto.c new file mode 100644 index 00000000..08bc5d64 --- /dev/null +++ b/src/log/log_verify_auto.c @@ -0,0 +1,318 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/btree.h" +#include "dbinc/txn.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/qam.h" +#include "dbinc/fop.h" + +/* + * PUBLIC: int __crdel_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__crdel_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_metasub_verify, DB___crdel_metasub)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_create_verify, DB___crdel_inmem_create)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_rename_verify, DB___crdel_inmem_rename)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_remove_verify, DB___crdel_inmem_remove)) != 0) + return (ret); + return (0); +} + +/* + * PUBLIC: int __db_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__db_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __db_addrem_verify, DB___db_addrem)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_big_verify, DB___db_big)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_ovref_verify, DB___db_ovref)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_debug_verify, DB___db_debug)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_noop_verify, DB___db_noop)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_alloc_verify, DB___db_pg_alloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_free_verify, DB___db_pg_free)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_cksum_verify, DB___db_cksum)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_freedata_verify, DB___db_pg_freedata)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_init_verify, DB___db_pg_init)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_trunc_verify, DB___db_pg_trunc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_realloc_verify, DB___db_realloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_relink_verify, DB___db_relink)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_merge_verify, DB___db_merge)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pgno_verify, DB___db_pgno)) != 0) + return (ret); + return (0); +} + +/* + * PUBLIC: int __dbreg_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__dbreg_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __dbreg_register_verify, DB___dbreg_register)) != 0) + return (ret); + return (0); +} + +/* + * PUBLIC: int __bam_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__bam_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_split_verify, DB___bam_split)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_rsplit_verify, DB___bam_rsplit)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_adj_verify, DB___bam_adj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_cadjust_verify, DB___bam_cadjust)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_cdel_verify, DB___bam_cdel)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_repl_verify, DB___bam_repl)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_root_verify, DB___bam_root)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_curadj_verify, DB___bam_curadj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_rcuradj_verify, DB___bam_rcuradj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_irep_verify, DB___bam_irep)) != 0) + return (ret); + return (0); +} + +/* + * PUBLIC: int __fop_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__fop_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_create_verify, DB___fop_create)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_remove_verify, DB___fop_remove)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_write_verify, DB___fop_write)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_rename_verify, DB___fop_rename)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_rename_verify, DB___fop_rename_noundo)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_file_remove_verify, DB___fop_file_remove)) != 0) + return (ret); + return (0); +} + +#ifdef HAVE_HASH +/* + * PUBLIC: int __ham_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__ham_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_insdel_verify, DB___ham_insdel)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_newpage_verify, DB___ham_newpage)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_splitdata_verify, DB___ham_splitdata)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_replace_verify, DB___ham_replace)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_copypage_verify, DB___ham_copypage)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_metagroup_verify, DB___ham_metagroup)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_groupalloc_verify, DB___ham_groupalloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_changeslot_verify, DB___ham_changeslot)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_contract_verify, DB___ham_contract)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_curadj_verify, DB___ham_curadj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_chgpg_verify, DB___ham_chgpg)) != 0) + return (ret); + return (0); +} + +#endif /* HAVE_HASH */ +#ifdef HAVE_HEAP +/* + * PUBLIC: int __heap_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__heap_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_addrem_verify, DB___heap_addrem)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_pg_alloc_verify, DB___heap_pg_alloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_trunc_meta_verify, DB___heap_trunc_meta)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_trunc_page_verify, DB___heap_trunc_page)) != 0) + return (ret); + return (0); +} +#endif /* HAVE_HEAP */ +#ifdef HAVE_QUEUE +/* + * PUBLIC: int __qam_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__qam_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_incfirst_verify, DB___qam_incfirst)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_mvptr_verify, DB___qam_mvptr)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_del_verify, DB___qam_del)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_add_verify, DB___qam_add)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_delext_verify, DB___qam_delext)) != 0) + return (ret); + return (0); +} + +#endif /* HAVE_QUEUE */ +/* + * PUBLIC: int __txn_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__txn_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_regop_verify, DB___txn_regop)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_ckp_verify, DB___txn_ckp)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_child_verify, DB___txn_child)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_prepare_verify, DB___txn_prepare)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_recycle_verify, DB___txn_recycle)) != 0) + return (ret); + return (0); +} diff --git a/src/log/log_verify_int.c b/src/log/log_verify_int.c new file mode 100644 index 00000000..abe564c6 --- /dev/null +++ b/src/log/log_verify_int.c @@ -0,0 +1,4353 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * This file contains verification functions for all types of log records, + * one for each type. We can't make this automated like the log_type_print/read + * functions because there are no consistent handling. Each type of log records + * have unique ways to verify, and unique information to extract. + * + * In each verification function, we first call the log_type_read function + * to get the log_type_args structure, then extract information according to + * the type of log. The log types can be made into different categories, each + * of which have similar types of information. + * + * For example, txn_regop and txn_ckp types both have timestamps, and we + * want to maintain (timestamp,lsn) mapping, so we will have a on_timestamp + * function, and call it in txn_regop_verify and txn_ckp_verify functions, + * and in the two functions we may call other on_*** functions to extract and + * verify other information. + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/fop.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +#include "dbinc/log_verify.h" + +static int __log_vrfy_proc __P((DB_LOG_VRFY_INFO *, DB_LSN, DB_LSN, + u_int32_t, DB_TXN *, int32_t, int *)); +static int __lv_ckp_vrfy_handler __P((DB_LOG_VRFY_INFO *, + VRFY_TXN_INFO *, void *)); +static const char *__lv_dbreg_str __P((u_int32_t)); +static int __lv_dbregid_to_dbtype __P((DB_LOG_VRFY_INFO *, int32_t, DBTYPE *)); +static int __lv_dbt_str __P((const DBT *, char **)); +static const char *__lv_dbtype_str __P((DBTYPE)); +static u_int32_t __lv_first_offset __P((ENV *)); +static int __lv_new_logfile_vrfy __P((DB_LOG_VRFY_INFO *, const DB_LSN *)); +static int __lv_log_fwdscr_oncmt __P((DB_LOG_VRFY_INFO *, DB_LSN, + u_int32_t, u_int32_t, int32_t)); +static int __lv_log_fwdscr_onrec __P((DB_LOG_VRFY_INFO *, + u_int32_t, u_int32_t, DB_LSN, DB_LSN)); +static int __lv_log_mismatch __P((DB_LOG_VRFY_INFO *, DB_LSN, DBTYPE, DBTYPE)); +static int __lv_on_bam_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t)); +static int __lv_on_ham_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t)); +static int __lv_on_heap_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t)); +static int __lv_on_new_txn __P((DB_LOG_VRFY_INFO *, const DB_LSN *, + const DB_TXN *, u_int32_t, int32_t, const DBT *)); +static int __lv_on_nontxn_update __P((DB_LOG_VRFY_INFO *, const DB_LSN *, + u_int32_t, u_int32_t, int32_t)); +static int __lv_on_page_update __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t, + db_pgno_t, DB_TXN *, int *)); +static int __lv_on_qam_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t)); +static int __lv_on_timestamp __P((DB_LOG_VRFY_INFO *, const DB_LSN *, + int32_t, u_int32_t)); +static int __lv_on_txn_aborted __P((DB_LOG_VRFY_INFO *)); +static int __lv_on_txn_logrec __P((DB_LOG_VRFY_INFO *, const DB_LSN *, + const DB_LSN *, const DB_TXN *, u_int32_t, int32_t)); +static int __lv_vrfy_for_dbfile __P((DB_LOG_VRFY_INFO *, int32_t, int *)); + +/* General error handlers, called when a check fails. */ +#define ON_ERROR(lvh, errv) do { \ + (lvh)->flags |= (errv); \ + if (F_ISSET((lvh), DB_LOG_VERIFY_CAF)) \ + ret = 0;/* Ignore the error and continue. */ \ + goto err; \ +} while (0) + +/* Used by logs of unsupported types. */ +#define ON_NOT_SUPPORTED(env, lvh, lsn, ltype) do { \ + __db_errx((env), DB_STR_A("2536", \ + "[%lu][%lu] Not supported type of log record %u.", \ + "%lu %lu %u"), (u_long)((lsn).file), (u_long)((lsn).offset),\ + (ltype)); \ + (lvh)->unknown_logrec_cnt++; \ + goto err; \ +} while (0) + +#define SKIP_FORWARD_CHK(type) ((type) != DB___txn_regop && \ + (type) != DB___txn_ckp && (type) != DB___fop_rename && \ + (type) != DB___txn_child) + +#define NOTCOMMIT(type) ((type) != DB___txn_regop && \ + (type) != DB___txn_child) + +#define LOG_VRFY_PROC(lvh, lsn, argp, fileid) do { \ + int __lv_log_vrfy_proc_step = 0; \ + if ((ret = __log_vrfy_proc((lvh), (lsn), (argp)->prev_lsn, \ + (argp)->type, (argp)->txnp, (fileid), \ + &__lv_log_vrfy_proc_step)) != 0) \ + goto err; \ + if (__lv_log_vrfy_proc_step == 1) \ + goto out; \ + else if (__lv_log_vrfy_proc_step == -1) \ + goto err; \ + else \ + DB_ASSERT(lvh->dbenv->env, \ + __lv_log_vrfy_proc_step == 0); \ +} while (0) + +/* Log record handlers used by log types involving page updates. */ +#define ON_PAGE_UPDATE(lvh, lsn, argp, pgno) do { \ + int __lv_onpgupdate_res; \ + if ((ret = __lv_on_page_update((lvh), (lsn), (argp)->fileid, \ + (pgno), (argp)->txnp, &__lv_onpgupdate_res)) != 0) \ + goto err; \ + if (__lv_onpgupdate_res == 1) \ + goto out; \ + else if (__lv_onpgupdate_res == -1) \ + goto err; \ + else \ + DB_ASSERT(lvh->dbenv->env, __lv_onpgupdate_res == 0); \ +} while (0) + +static int +__lv_on_page_update(lvh, lsn, fileid, pgno, txnp, step) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + int32_t fileid; + db_pgno_t pgno; + DB_TXN *txnp; + int *step; +{ + u_int32_t otxn, txnid; + int res, ret; + + txnid = txnp->txnid; + res = ret = 0; + + if ((ret = __add_page_to_txn(lvh, fileid, pgno, + txnid, &otxn, &res)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + if (res != -1) {/* No access violation, we are done. */ + *step = 0; + goto out; + } + /* + * It's OK for a child txn to update its parent's page, but not OK + * for a parent txn to update its active child's pages. We can't + * detect the child's abort, so we may false alarm that a parent txn + * is updating its child's pages. + */ + if ((ret = __is_ancestor_txn(lvh, otxn, txnid, lsn, &res)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + if (res) {/* The txnid is updating its parent otxn's pages. */ + *step = 0; + goto out; + } + if ((ret = __is_ancestor_txn(lvh, txnid, otxn, lsn, &res)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + if (res) {/* The txnid is updating its active child otxn's pages. */ + __db_errx(lvh->dbenv->env, DB_STR_A("2537", + "[%lu][%lu] [WARNING] Parent txn %lx is updating its " + "active child txn %lx's pages, or %lx aborted.", + "%lu %lu %lx %lx %lx"), (u_long)lsn.file, + (u_long)lsn.offset, (u_long)txnid, + (u_long)otxn, (u_long)otxn); + *step = 0; + goto out; + } + /* + * It's likely that the two txns are parent-child and the child + * aborted, but from the log we can't figure out this fact. + */ + __db_errx(lvh->dbenv->env, DB_STR_A("2538", + "[%lu][%lu] [WARNING] Txn %lx is updating txn %lx's pages.", + "%lu %lu %lx %lx"), (u_long)lsn.file, (u_long)lsn.offset, + (u_long)txnid, (u_long)otxn); + *step = 0; +out: +err: + return (ret); +} + +/* + * This macro is put in all types of verify functions where a db file is + * updated, but no page number/lock involved. + */ +#define ON_PAGE_UPDATE4 + +/* + * General log record handler used by all log verify functions. + */ +static int +__log_vrfy_proc(lvh, lsn, prev_lsn, type, txnp, fileid, step) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn, prev_lsn; + u_int32_t type; /* Log record type. */ + DB_TXN *txnp; + int32_t fileid; + int *step; +{ + int dovrfy, ret; + + dovrfy = 1; + ret = 0; + /* + * step is used to tell if go on with the rest of the caller, or + * goto err/out. + * 0: go on after this function; 1: goto out; -1: goto err. + */ + *step = 0; + + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + /* Commits are not abort/beginnings. */ + if (NOTCOMMIT(type) && ((ret = __lv_log_fwdscr_onrec( + lvh, txnp->txnid, type, prev_lsn, lsn)) != 0)) + goto err; + if (SKIP_FORWARD_CHK(type)) + goto out; + } else {/* Verifying */ + if (F_ISSET(lvh, DB_LOG_VERIFY_VERBOSE)) + __db_errx(lvh->dbenv->env, DB_STR_A("2539", + "[%lu][%lu] Verifying log record of type %s", + "%lu %lu %s"), (u_long)lsn.file, + (u_long)lsn.offset, LOGTYPE_NAME(lvh, type)); + /* + * If verifying a log range and we've passed the initial part + * which may have partial txns, remove the PARTIAL bit. + */ + if (F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL) && + LOG_COMPARE(&lsn, &(lvh->valid_lsn)) >= 0) { + lvh->valid_lsn.offset = lvh->valid_lsn.file = 0; + F_CLR(lvh, DB_LOG_VERIFY_PARTIAL); + } + + if ((ret = __lv_new_logfile_vrfy(lvh, &lsn)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + /* If only verify a db file, ignore logs about other dbs. */ + if (F_ISSET(lvh, DB_LOG_VERIFY_DBFILE) && fileid != + INVAL_DBREGID && (ret = __lv_vrfy_for_dbfile(lvh, + fileid, &dovrfy)) != 0) + goto err; + if (!dovrfy) + goto out; + if (lvh->aborted_txnid != 0 && + ((ret = __lv_on_txn_aborted(lvh)) != 0)) + goto err; + if ((ret = __get_aborttxn(lvh, lsn)) != 0) + goto err; + if (txnp->txnid >= TXN_MINIMUM) { + if ((ret = __lv_on_txn_logrec(lvh, &lsn, &(prev_lsn), + txnp, type, fileid)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } else {/* Non-txnal updates. */ + if ((ret = __lv_on_nontxn_update(lvh, &lsn, + txnp->txnid, type, fileid)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + } + if (0) { +out: + *step = 1; + } + if (0) { +err: + *step = -1; + } + return (ret); +} + +/* Log record handlers used by log types for each access method. */ +static int +__lv_on_bam_log(lvh, lsn, fileid) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + int32_t fileid; +{ + int ret; + DBTYPE dbtype; + if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 && + dbtype != DB_BTREE && dbtype != DB_RECNO && dbtype != DB_HASH) + ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_BTREE); + if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + return (ret); +} + +static int +__lv_on_ham_log(lvh, lsn, fileid) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + int32_t fileid; +{ + int ret; + DBTYPE dbtype; + if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 && + dbtype != DB_HASH) + ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_HASH); + if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + return (ret); +} + +static int +__lv_on_heap_log(lvh, lsn, fileid) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + int32_t fileid; +{ + int ret; + DBTYPE dbtype; + if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 && + dbtype != DB_HEAP) + ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_HEAP); + if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + return (ret); +} + +static int +__lv_on_qam_log(lvh, lsn, fileid) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + int32_t fileid; +{ + int ret; + DBTYPE dbtype; + if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 && + dbtype != DB_QUEUE) + ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_QUEUE); + if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + return (ret); +} + +/* Catch commits and store into lvinfo->txnrngs database. */ +static int +__lv_log_fwdscr_oncmt(lvinfo, lsn, txnid, ptxnid, timestamp) + DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; + u_int32_t txnid, ptxnid; + int32_t timestamp; +{ + int ret; + struct __lv_txnrange tr; + DBT key, data; + + memset(&tr, 0, sizeof(tr)); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + tr.txnid = txnid; + tr.end = lsn; + tr.when_commit = timestamp; + tr.ptxnid = ptxnid; + key.data = &(txnid); + key.size = sizeof(txnid); + data.data = &tr; + data.size = sizeof(tr); + if ((ret = __db_put(lvinfo->txnrngs, lvinfo->ip, NULL, + &key, &data, 0)) != 0) + goto err; +err: + return (ret); +} + +/* Catch aborts and txn beginnings and store into lvinfo->txnrngs database. */ +static int +__lv_log_fwdscr_onrec(lvinfo, txnid, lrtype, prevlsn, lsn) + DB_LOG_VRFY_INFO *lvinfo; + u_int32_t txnid, lrtype; + DB_LSN prevlsn, lsn; +{ + int doput, ret, ret2, tret; + u_int32_t putflag; + struct __lv_txnrange tr, *ptr; + DBC *csr; + DBT key, key2, data, data2; + + /* Ignore non-txnal log records. */ + if (txnid < TXN_MINIMUM) + return (0); + + /* Not used for now, but may be used later. Pass lint checks. */ + COMPQUIET(lrtype ,0); + putflag = 0; + doput = ret = ret2 = 0; + csr = NULL; + memset(&tr, 0, sizeof(tr)); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&key2, 0, sizeof(DBT)); + memset(&data2, 0, sizeof(DBT)); + key.data = &txnid; + key.size = sizeof(txnid); + tr.txnid = txnid; + tr.when_commit = 0;/* This is not a __txn_regop record. */ + + if ((ret = __db_cursor(lvinfo->txnrngs, lvinfo->ip, + NULL, &csr, 0)) != 0) + goto err; + /* + * If the txnid is first seen here or reused later, it's aborted + * after this log record; if this log record is the 1st one of a txn, + * we have the beginning of the txn; otherwise the log record is one + * of the actions taken within the txn, and we don't do anything. + */ + if ((ret = __dbc_get(csr, &key, &data, DB_SET)) != 0 && + ret != DB_NOTFOUND) + goto err; + + ptr = (struct __lv_txnrange *)data.data; + if (ret == DB_NOTFOUND || !IS_ZERO_LSN(ptr->begin)) { + tr.end = lsn; + data.data = &tr; + data.size = sizeof(tr); + doput = 1; + key2.data = &lsn; + key2.size = sizeof(lsn); + data2.data = &(tr.txnid); + data2.size = sizeof(tr.txnid); + putflag = DB_KEYFIRST; + if ((ret2 = __db_put(lvinfo->txnaborts, lvinfo->ip, NULL, + &key2, &data2, 0)) != 0) { + ret = ret2; + goto err; + } + } else if (ret == 0 && IS_ZERO_LSN(prevlsn)) {/* The beginning of txn.*/ + /* The begin field must be [0, 0]. */ + DB_ASSERT(lvinfo->dbenv->env, IS_ZERO_LSN(ptr->begin)); + ptr->begin = lsn; + putflag = DB_CURRENT; + doput = 1; + } + + if (doput && (ret = __dbc_put(csr, &key, &data, putflag)) != 0) + goto err; +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + + return (ret); +} + +/* + * Return 0 from dovrfy if verifying logs for a specified db file, and fileid + * is not the one we want; Otherwise return 1 from dovrfy. If DB operations + * failed, the error is returned. + */ +static int +__lv_vrfy_for_dbfile(lvh, fileid, dovrfy) + DB_LOG_VRFY_INFO *lvh; + int32_t fileid; + int *dovrfy; +{ + u_int8_t tmpuid[DB_FILE_ID_LEN]; + VRFY_FILEREG_INFO *fregp; + u_int32_t i; + int ret, tret; + DBT tgtkey; + + ret = tret = 0; + *dovrfy = 0; + fregp = NULL; + memset(tmpuid, 0, sizeof(u_int8_t) * DB_FILE_ID_LEN); + memset(&tgtkey, 0, sizeof(tgtkey)); + tgtkey.data = lvh->target_dbid; + tgtkey.size = DB_FILE_ID_LEN; + ret = __get_filereg_info(lvh, &tgtkey, &fregp); + + /* + * If the target db file is not seen yet, we don't verify any file, + * and it does not mean anything wrong. + */ + if (ret == DB_NOTFOUND) { + ret = 0; + goto out; + } + if (ret != 0) + goto err; + + for (i = 0; i < fregp->regcnt; i++) + if (fregp->dbregids[i] == fileid) { + *dovrfy = 1; + goto out; + } +out: +err: + if (fregp != NULL && + (tret = __free_filereg_info(fregp)) != 0 && ret == 0) + ret = tret; + + return (ret); +} + +static int +__lv_log_mismatch(lvh, lsn, dbtype, exp_dbtype) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + DBTYPE dbtype, exp_dbtype; +{ + int ret; + + __db_errx(lvh->dbenv->env, DB_STR_A("2540", + "[%lu][%lu] Log record type does not match related database type, " + "current database type: %s, expected database type according to " + "the log record type: %s.", "%lu %lu %s %s"), + (u_long)lsn.file, (u_long)lsn.offset, __lv_dbtype_str(dbtype), + __lv_dbtype_str(exp_dbtype)); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); +err: + return (ret); +} + +static int +__lv_dbregid_to_dbtype(lvh, id, ptype) + DB_LOG_VRFY_INFO *lvh; + int32_t id; + DBTYPE *ptype; +{ + int ret; + VRFY_FILELIFE *pflife; + + ret = 0; + pflife = NULL; + + if ((ret = __get_filelife(lvh, id, &pflife)) != 0) + goto err; + *ptype = pflife->dbtype; +err: + if (pflife != NULL) + __os_free(lvh->dbenv->env, pflife); + + return (ret); +} + +/* + * __db_log_verify_global_report -- + * Report statistics data in DB_LOG_VRFY_INFO handle. + * + * PUBLIC: void __db_log_verify_global_report __P((const DB_LOG_VRFY_INFO *)); + */ +void __db_log_verify_global_report (lvinfo) + const DB_LOG_VRFY_INFO *lvinfo; +{ + u_int32_t i, nltype; + + __db_msg(lvinfo->dbenv->env, + "Number of active transactions: %u;", lvinfo->ntxn_active); + __db_msg(lvinfo->dbenv->env, + "Number of committed transactions: %u;", lvinfo->ntxn_commit); + __db_msg(lvinfo->dbenv->env, + "Number of aborted transactions: %u;", lvinfo->ntxn_abort); + __db_msg(lvinfo->dbenv->env, + "Number of prepared transactions: %u;", lvinfo->ntxn_prep); + __db_msg(lvinfo->dbenv->env, + "Total number of checkpoint: %u;", lvinfo->nckp); + __db_msg(lvinfo->dbenv->env, + "Total number of non-transactional updates: %u;", + lvinfo->non_txnup_cnt); + __db_msg(lvinfo->dbenv->env, + "Total number of unknown log records: %u;", + lvinfo->unknown_logrec_cnt); + __db_msg(lvinfo->dbenv->env, + "Total number of app-specific log record: %u;", + lvinfo->external_logrec_cnt); + __db_msg(lvinfo->dbenv->env, + "The number of each type of log record:"); + + for (i = 0; i < 256; i++) { + nltype = lvinfo->lrtypes[i]; + if (LOGTYPE_NAME(lvinfo, i) != NULL) + __db_msg(lvinfo->dbenv->env, "\n\t%s : %u;", + LOGTYPE_NAME(lvinfo, i), nltype); + } +} + +/* + * PUBLIC: int __crdel_metasub_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__crdel_metasub_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __crdel_metasub_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __crdel_metasub_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __crdel_inmem_create_verify __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_create_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __crdel_inmem_create_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __crdel_inmem_create_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __crdel_inmem_rename_verify __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_rename_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __crdel_inmem_rename_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __crdel_inmem_rename_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __crdel_inmem_remove_verify __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_remove_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __crdel_inmem_remove_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __crdel_inmem_remove_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_addrem_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_addrem_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_addrem_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_addrem_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_big_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_big_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_big_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_big_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_ovref_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_ovref_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_ovref_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_ovref_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_relink_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_relink_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_relink_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_relink_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_debug_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_debug_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_debug_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __db_debug_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_noop_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_noop_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_noop_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_noop_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_alloc_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_alloc_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_alloc_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_alloc_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_alloc_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_alloc_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_alloc_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_alloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_free_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_free_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_free_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_free_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_free_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_free_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_free_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_free_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_cksum_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_cksum_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_cksum_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_freedata_42_verify __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__db_pg_freedata_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_freedata_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_freedata_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_freedata_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_freedata_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_freedata_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_freedata_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_init_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_init_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_init_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_init_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_sort_44_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_sort_44_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_sort_44_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_sort_44_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_trunc_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_trunc_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_trunc_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_trunc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ +out: +err: + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_realloc_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_realloc_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_realloc_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_realloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_relink_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_relink_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_relink_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_relink_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_merge_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_merge_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_merge_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_merge_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pgno_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pgno_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pgno_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pgno_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +static const char * +__lv_dbreg_str(op) + u_int32_t op; +{ + const char *p; + + switch (op) { + case DBREG_CHKPNT: + p = "DBREG_CHKPNT"; + break; + case DBREG_RCLOSE: + p = "DBREG_RCLOSE"; + break; + case DBREG_CLOSE: + p = "DBREG_CLOSE"; + break; + case DBREG_OPEN: + p = "DBREG_OPEN"; + break; + case DBREG_PREOPEN: + p = "DBREG_PREOPEN"; + break; + case DBREG_REOPEN: + p = "DBREG_REOPEN"; + break; + case DBREG_XCHKPNT: + p = "DBREG_XCHKPNT"; + break; + case DBREG_XOPEN: + p = "DBREG_XOPEN"; + break; + case DBREG_XREOPEN: + p = "DBREG_XREOPEN"; + break; + default: + p = DB_STR_P("Unknown dbreg op code"); + break; + } + + return (p); +} + +static int +__lv_dbt_str(dbt, str) + const DBT *dbt; + char **str; +{ + char *p, *q; + u_int32_t buflen, bufsz, i; + int ret; + + ret = 0; + p = q = NULL; + buflen = bufsz = i = 0; + bufsz = sizeof(char) * dbt->size * 2; + + if ((ret = __os_malloc(NULL, bufsz, &p)) != 0) + goto err; + q = (char *)dbt->data; + + memset(p, 0, bufsz); + /* + * Each unprintable character takes up several bytes, so be ware of + * memory access violation. + */ + for (i = 0; i < dbt->size && buflen < bufsz; i++) { + buflen = (u_int32_t)strlen(p); + snprintf(p + buflen, bufsz - (buflen + 1), + isprint(q[i]) || q[i] == 0x0a ? "%c" : "%x", q[i]); + } + *str = p; +err: + return (ret); +} + +static const char * +__lv_dbtype_str(dbtype) + DBTYPE dbtype; +{ + char *p; + + switch (dbtype) { + case DB_BTREE: + p = "DB_BTREE"; + break; + case DB_HASH: + p = "DB_HASH"; + break; + case DB_RECNO: + p = "DB_RECNO"; + break; + case DB_QUEUE: + p = "DB_QUEUE"; + break; + default: + p = DB_STR_P("Unknown db type"); + break; + } + + return (p); +} + +/* + * PUBLIC: int __dbreg_register_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__dbreg_register_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __dbreg_register_args *argp; + DB_LOG_VRFY_INFO *lvh; + VRFY_FILEREG_INFO *fregp, freg; + VRFY_FILELIFE *pflife, flife; + int checklife, rmv_dblife, ret, ret2; + u_int32_t opcode; + char *puid; + const char *dbfname; + + dbfname = NULL; + checklife = 1; + opcode = 0; + ret = ret2 = rmv_dblife = 0; + puid = NULL; + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + fregp = NULL; + pflife = NULL; + memset(&flife, 0, sizeof(flife)); + memset(&freg, 0, sizeof(freg)); + + if ((ret = __dbreg_register_read(env, dbtp->data, &argp)) != 0) + return (ret); + + opcode = FLD_ISSET(argp->opcode, DBREG_OP_MASK); + dbfname = argp->name.size == 0 ? "(null)" : (char *)(argp->name.data); + /* + * We don't call LOG_VRFY_PROC macro here, so we have to copy the code + * snippet in __log_vrfy_proc here. + */ + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + if ((ret = __lv_log_fwdscr_onrec(lvh, argp->txnp->txnid, + argp->type, argp->prev_lsn, *lsnp)) != 0) + goto err; + goto out; + } + if (lvh->aborted_txnid != 0 && (ret = __lv_on_txn_aborted(lvh)) != 0) + goto err; + + if ((ret = __get_filereg_info(lvh, &(argp->uid), &fregp)) != 0 && + ret != DB_NOTFOUND) + goto err; + + /* + * When DBREG_CLOSE, we should remove the fileuid-filename mapping + * from filereg because the file can be opened again with a different + * fileuid after closed. + */ + if (ret == 0 && IS_DBREG_CLOSE(opcode)) { + if ((ret = __db_del(lvh->fileregs, lvh->ip, NULL, + &(argp->uid), 0)) != 0) + goto err; + } + + /* + * If this db file is seen for the 1st time, store filereg and + * filelife info. Since we will do a end-to-begin scan before the + * verification, we will be able to get the record but it's regcnt + * is 0 since we didn't know any dbregid yet. + */ + if (ret == DB_NOTFOUND || fregp->regcnt == 0) { + /* Store filereg info unless it's a CLOSE. */ + freg.fileid = argp->uid; + if (!IS_DBREG_CLOSE(opcode)) { + freg.regcnt = 1; + freg.dbregids = &(argp->fileid); + } else { + freg.regcnt = 0; + freg.dbregids = NULL; + } + if (ret == DB_NOTFOUND) { + /* + * If the db file is an in-memory db file, we can arrive + * here because there is no __fop_rename log for it; + * if the __fop_rename log record is out of the log range we + * verify, we will also arrive here. + */ + if ((ret = __os_malloc(env, argp->name.size + 1, + &(freg.fname))) != 0) + goto err; + memset(freg.fname, 0, + sizeof(char) * (argp->name.size + 1)); + (void)strncpy(freg.fname, + (const char *)(argp->name.data), argp->name.size); + } else /* We already have the name. */ + if ((ret = __os_strdup(env, + fregp->fname, &(freg.fname))) != 0) + goto err; + + if (!IS_DBREG_OPEN(opcode) && + !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + /* It's likely that the DBREG_OPEN is not seen.*/ + __db_msg(env, DB_STR_A("2541", + "[%lu][%lu] Suspicious dbreg operation: %s, the " + "database file %s's register in log region does " + "not begin with an open operation.", + "%lu %lu %s %s"), (u_long)lsnp->file, + (u_long)lsnp->offset, + __lv_dbreg_str(opcode), dbfname); + } + + /* + * PREOPEN is only generated when opening an in-memory db. + * Because we need to log the fileid we're allocating, but we + * don't have all the details yet, we are preopening the + * database and will actually complete the open later. So + * PREOPEN is not a real open, and the log should be ignored + * in log_verify. + * If fileuid is in a CLOSE operation there is no need to + * record it. + */ + if ((opcode != DBREG_PREOPEN) && !IS_DBREG_CLOSE(opcode) && + (ret = __put_filereg_info(lvh, &freg)) != 0) + goto err; + + /* Store filelife info unless it's a CLOSE dbreg operation. */ + if (!IS_DBREG_CLOSE(opcode)) { + flife.lifetime = opcode; + flife.dbregid = argp->fileid; + flife.lsn = *lsnp; + flife.dbtype = argp->ftype; + flife.meta_pgno = argp->meta_pgno; + memcpy(flife.fileid, argp->uid.data, argp->uid.size); + if ((ret = __put_filelife(lvh, &flife)) != 0) + goto err; + } + /* on_txn_logrec relies on the freg info in db first. */ + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + goto out; + } + + /* + * Add dbregid if it's new, and store the file register info; or + * remove dbregid from fregp if we are closing the file. + */ + if ((ret = __add_dbregid(lvh, fregp, argp->fileid, + opcode, *lsnp, argp->ftype, argp->meta_pgno, &ret2)) != 0) + goto err; + ret = ret2; + if (ret != 0 && ret != 1 && ret != 2 && ret != -1) + goto err;/* DB operation error. */ + if (ret != 0) { + /* Newly seen dbregid does not need to check life. */ + if (ret == 1) + checklife = 0; + else if (ret == -1) + rmv_dblife = 1;/* The dbreg file id is closed. */ + else if (ret == 2) { + __db_errx(env, DB_STR_A("2542", + "[%lu][%lu] Wrong dbreg operation " + "sequence, opening %s for id %d which is already " + "open.", "%lu %lu %s %d"), + (u_long)lsnp->file, (u_long)lsnp->offset, + dbfname, argp->fileid); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + if (!rmv_dblife && (ret = __put_filereg_info(lvh, fregp)) != 0) + goto err; + } + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + if (!checklife) + goto out; + + /* + * Verify the database type does not change, and the lifetime of a + * db file follow an open/chkpnt->[chkpnt]->close order. + * A VRFY_FILELIFE record is removed from db on DBREG_CLOSE, + * and inserted into db on DBREG_OPEN. + */ + if (!IS_DBREG_OPEN(opcode) && + (ret = __get_filelife(lvh, argp->fileid, &pflife)) != 0) { + if (ret == DB_NOTFOUND) { + if (!F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + __db_errx(env, DB_STR_A("2543", + "[%lu][%lu] Wrong dbreg operation sequence," + "file %s with id %d is first seen of " + "status: %s", "%lu %lu %s %d"), + (u_long)lsnp->file, (u_long)lsnp->offset, + dbfname, argp->fileid, + __lv_dbreg_str(opcode)); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } else + ret = 0; + } + goto err; + } + + /* Can't go on verifying without pflife. */ + if (pflife == NULL) + goto out; + if (argp->ftype != pflife->dbtype) { + if ((ret = __lv_dbt_str(&(argp->uid), &puid)) != 0) + goto err; + __db_errx(env, DB_STR_A("2544", + "[%lu][%lu] The dbtype of database file %s with uid %s " + " and id %d has changed from %s to %s.", + "%lu %lu %s %s %d %s %s"), (u_long)lsnp->file, + (u_long)lsnp->offset, dbfname, puid, + pflife->dbregid, __lv_dbtype_str(pflife->dbtype), + __lv_dbtype_str(argp->ftype)); + + __os_free(env, puid); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + + if ((IS_DBREG_CLOSE(opcode) && + (pflife->lifetime != DBREG_CHKPNT || + pflife->lifetime != DBREG_XCHKPNT) && + !IS_DBREG_OPEN(pflife->lifetime))) { + __db_errx(env, DB_STR_A("2545", + "[%lu][%lu] Wrong dbreg operation sequence for file %s " + "with id %d, current status: %s, new status: %s", + "%lu %lu %s %d %s %s"), (u_long)lsnp->file, + (u_long)lsnp->offset, dbfname, pflife->dbregid, + __lv_dbreg_str(pflife->lifetime), + __lv_dbreg_str(opcode)); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + + pflife->lifetime = opcode; + pflife->lsn = *lsnp; + if ((!rmv_dblife && (ret = __put_filelife(lvh, pflife)) != 0) || + ((rmv_dblife || IS_DBREG_CLOSE(opcode)) && + ((ret = __del_filelife(lvh, argp->fileid)) != 0))) + goto err; + +out: + /* There may be something to do here in future. */ +err: + __os_free(env, argp); + if (fregp != NULL && + (ret2 = __free_filereg_info(fregp)) != 0 && ret == 0) + ret = ret2; + if (freg.fname != NULL) + __os_free(env, freg.fname); + if (pflife != NULL) + __os_free(env, pflife); + + return (ret); +} + +/* + * PUBLIC: int __bam_split_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_split_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_split_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_split_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->left); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->right); + /* Parent page lock is always released before __bam_page returns. */ + + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_split_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_split_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_split_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_split_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_rsplit_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_rsplit_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_rsplit_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_rsplit_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_adj_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_adj_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_adj_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_adj_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_irep_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_irep_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_irep_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_irep_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_cadjust_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_cadjust_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_cadjust_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_cadjust_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_cdel_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_cdel_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_cdel_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_cdel_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_repl_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_repl_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_repl_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_repl_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_root_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_root_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_root_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_root_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_curadj_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_curadj_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_curadj_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_curadj_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __bam_rcuradj_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_rcuradj_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_rcuradj_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_rcuradj_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __bam_relink_43_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_relink_43_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_relink_43_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_relink_43_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __bam_merge_44_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_merge_44_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_merge_44_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_merge_44_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_create_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_create_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_create_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_create_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_create_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_create_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_create_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_create_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_remove_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_remove_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_remove_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_remove_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_write_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_write_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_write_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_write_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __fop_write_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_write_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_write_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_write_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __fop_rename_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_rename_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_rename_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_rename_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_rename_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_rename_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_rename_args *argp; + DB_LOG_VRFY_INFO *lvh; + char *buf; + int ret; + size_t buflen; + VRFY_FILEREG_INFO freg, *fregp; + + memset(&freg, 0, sizeof(freg)); + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + buf = NULL; + + if ((ret = __fop_rename_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + /* + * Since we get the fname-fuid map when iterating from end to + * beginning, we only store the latest file name, that's the + * name supposed to be used currently. So if the fileid is + * already stored, and we see it again here, it means the db + * file was renamed and we already have its latest name. + * + * Store the dbfile path (dir/fname) in case there are db + * files with same name in different data directories. + */ + if (__get_filereg_info(lvh, &(argp->fileid), &fregp) == 0) { + if (fregp != NULL && + (ret = __free_filereg_info(fregp)) != 0) + goto err; + goto out; + } + freg.fileid = argp->fileid; + if ((ret = __os_malloc(env, buflen = argp->dirname.size + + argp->newname.size + 2, &buf)) != 0) + goto err; + snprintf(buf, buflen, "%s/%s", (char *)argp->dirname.data, + (char *)argp->newname.data); + freg.fname = buf; + /* Store the dbfilename<-->dbfileid map. */ + if ((ret = __put_filereg_info(lvh, &freg)) != 0) + goto err; + } +out: + +err: + if (buf != NULL) + __os_free(lvh->dbenv->env, buf); + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_file_remove_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_file_remove_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_file_remove_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_file_remove_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +#ifdef HAVE_HASH +/* + * PUBLIC: int __ham_insdel_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_insdel_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_insdel_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_insdel_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_newpage_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_newpage_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_newpage_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_newpage_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_splitdata_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_splitdata_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_splitdata_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_splitdata_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_replace_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_replace_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_replace_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_replace_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_copypage_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_copypage_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_copypage_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_copypage_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_metagroup_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_metagroup_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_metagroup_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_metagroup_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_metagroup_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_metagroup_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_metagroup_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_metagroup_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_groupalloc_42_verify __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__ham_groupalloc_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_groupalloc_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_groupalloc_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_groupalloc_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_groupalloc_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_groupalloc_args *argp; + DB_LOG_VRFY_INFO *lvh; + VRFY_FILELIFE *pflife; + int ret; + + ret = 0; + pflife = NULL; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_groupalloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ + + /* + * The __ham_groupalloc record is only generated when creating the + * hash sub database so it will always be on the master database's + * fileid. + */ + + if ((ret = __get_filelife(lvh, argp->fileid, &pflife)) != 0) + goto err; + + if (pflife->meta_pgno != PGNO_BASE_MD) { + __db_errx(lvh->dbenv->env, DB_STR_A("2546", + "[%lu][%lu] __ham_groupalloc should apply only to the " + "master database with meta page number 0, current meta " + "page number is %d.", "%lu %lu %d"), + (u_long)lsnp->file, (u_long)lsnp->offset, + pflife->meta_pgno); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + +out: + +err: + if (pflife != NULL) + __os_free(lvh->dbenv->env, pflife); + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_changeslot_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_changeslot_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_changeslot_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_changeslot_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_contract_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_contract_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_contract_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_contract_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_curadj_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_curadj_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_curadj_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_curadj_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_chgpg_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_chgpg_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_chgpg_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_chgpg_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} +#endif + +#ifdef HAVE_HEAP +/* + * PUBLIC: int __heap_addrem_verify + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_addrem_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __heap_addrem_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __heap_addrem_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; +out: + +err: + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __heap_pg_alloc_verify + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_pg_alloc_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __heap_pg_alloc_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __heap_pg_alloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; +out: + +err: + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __heap_trunc_meta_verify + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_trunc_meta_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __heap_trunc_meta_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __heap_trunc_meta_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __heap_trunc_page_verify + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_trunc_page_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __heap_trunc_page_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __heap_trunc_page_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; +out: + +err: + __os_free(env, argp); + return (ret); +} +#endif + +#ifdef HAVE_QUEUE +/* + * PUBLIC: int __qam_incfirst_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_incfirst_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __qam_incfirst_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __qam_incfirst_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __qam_mvptr_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_mvptr_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __qam_mvptr_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __qam_mvptr_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __qam_del_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_del_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __qam_del_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __qam_del_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __qam_add_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_add_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __qam_add_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __qam_add_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __qam_delext_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_delext_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __qam_delext_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __qam_delext_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} +#endif + +/* + * PUBLIC: int __txn_regop_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_regop_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_regop_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __txn_regop_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_regop_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_regop_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_regop_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret, ret2, started; + VRFY_TXN_INFO *ptvi, *pptvi; + VRFY_TIMESTAMP_INFO tsinfo; + + ptvi = pptvi = NULL; + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + ret = ret2 = started = 0; + + if ((ret = __txn_regop_read(env, dbtp->data, &argp)) != 0) + return (ret); + + /* + * The __lv_log_fwdscr_oncmt call must precede LOG_VRFY_PROC otherwise + * this txn will be taken as an aborted txn. + */ + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + if ((ret = __lv_log_fwdscr_oncmt(lvh, *lsnp, + argp->txnp->txnid, 0, argp->timestamp)) != 0) + goto err; + + tsinfo.lsn = *lsnp; + tsinfo.timestamp = argp->timestamp; + tsinfo.logtype = argp->type; + if ((ret = __put_timestamp_info(lvh, &tsinfo)) != 0) + goto err; + goto out; /* We are done. */ + } + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + if ((ret = __del_txn_pages(lvh, argp->txnp->txnid)) != 0 && + ret != DB_NOTFOUND) + goto err;/* Some txns may have updated no pages. */ + if ((ret = __lv_on_timestamp(lvh, lsnp, argp->timestamp, + DB___txn_regop)) != 0) + goto err; + if ((ret = __get_txn_vrfy_info(lvh, argp->txnp->txnid, &ptvi)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && + (ret2 = __txn_started(lvh, lvh->lv_config->start_lsn, + argp->txnp->txnid, &started)) == 0 && started != 0) { + ret = 0; + goto err; + } + if (ret2 != 0) + ret = ret2; + __db_errx(lvh->dbenv->env, DB_STR_A("2547", + "[%lu][%lu] Can not find an active transaction's " + "information, txnid: %lx.", "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)argp->txnp->txnid); + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + + } + + if (ptvi == NULL) { + if (ret == DB_NOTFOUND && + F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + + } + DB_ASSERT(env, ptvi->ptxnid == 0); + + /* + * This log record is only logged when committing a outermost txn, + * child txn commits are logged in __txn_child_log. + */ + if (ptvi->ptxnid == 0) { + if (ptvi->status == TXN_STAT_PREPARE) + lvh->ntxn_prep--; + else if (ptvi->status == TXN_STAT_ACTIVE) + lvh->ntxn_active--; + lvh->ntxn_commit++; + } + ptvi->status = TXN_STAT_COMMIT; + DB_ASSERT(env, IS_ZERO_LSN(ptvi->last_lsn)); + ptvi->last_lsn = *lsnp; + if ((ret = __put_txn_vrfy_info(lvh, ptvi)) != 0) + goto err; + + /* Report txn stats. */ + if (F_ISSET(lvh, DB_LOG_VERIFY_VERBOSE)) + __db_msg(env, DB_STR_A("2548", + "[%lu][%lu] The number of active, committed and aborted " + "child txns of txn %lx: %u, %u, %u.", + "%lu %lu %lx %u %u %u"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)ptvi->txnid, + ptvi->nchild_active, ptvi->nchild_commit, + ptvi->nchild_abort); +out: +err: + + if (pptvi != NULL && (ret2 = __free_txninfo(pptvi)) != 0 && ret == 0) + ret = ret2; + if (ptvi != NULL && (ret2 = __free_txninfo(ptvi)) != 0 && ret == 0) + ret = ret2; + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_ckp_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_ckp_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_ckp_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __txn_ckp_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_ckp_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_ckp_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_ckp_args *argp; + DB_LOG_VRFY_INFO *lvh; + VRFY_CKP_INFO *lastckp, ckpinfo; + int ret; + struct __ckp_verify_params cvp; + VRFY_TIMESTAMP_INFO tsinfo; + char timebuf[CTIME_BUFLEN]; + time_t ckp_time, lastckp_time; + + lastckp = NULL; + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + memset(&ckpinfo, 0, sizeof(ckpinfo)); + memset(&cvp, 0, sizeof(cvp)); + + if ((ret = __txn_ckp_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + tsinfo.lsn = *lsnp; + tsinfo.timestamp = argp->timestamp; + tsinfo.logtype = argp->type; + /* + * Store the first ckp_lsn, or the least one greater than the + * starting point. There will be no partial txns after + * valid_lsn. + */ + if (!(!IS_ZERO_LSN(lvh->lv_config->start_lsn) && + LOG_COMPARE(&(lvh->lv_config->start_lsn), + &(argp->ckp_lsn)) > 0)) + lvh->valid_lsn = argp->ckp_lsn; + if ((ret = __put_timestamp_info(lvh, &tsinfo)) != 0) + goto err; + goto out;/* We are done, exit. */ + } + lvh->nckp++; + ckp_time = (time_t)argp->timestamp; + __db_msg(env, DB_STR_A("2549", + "[%lu][%lu] Checkpoint record, ckp_lsn: [%lu][%lu], " + "timestamp: %s. Total checkpoint: %u", + "%lu %lu %lu %lu %s %u"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)argp->ckp_lsn.file, + (u_long)argp->ckp_lsn.offset, + __os_ctime(&ckp_time, timebuf), lvh->nckp); + + if ((ret = __lv_on_timestamp(lvh, lsnp, + argp->timestamp, DB___txn_ckp)) != 0) + goto err; + if (((ret = __get_last_ckp_info(lvh, &lastckp)) != 0) && + ret != DB_NOTFOUND) + return (ret); + if (ret == DB_NOTFOUND) + goto cont; + + if (LOG_COMPARE(&(argp->last_ckp), &(lastckp->lsn)) != 0) { + __db_errx(env, DB_STR_A("2550", + "[%lu][%lu] Last known checkpoint [%lu][%lu] not equal " + "to last_ckp :[%lu][%lu]. Some checkpoint log records " + "may be missing.", "%lu %lu %lu %lu %lu %lu"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)lastckp->lsn.file, (u_long)lastckp->lsn.offset, + (u_long)argp->last_ckp.file, (u_long)argp->last_ckp.offset); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + + /* + * Checkpoint are generally not performed quite often, so we see this + * as an error, but in txn commits we see it as a warning. + */ + lastckp_time = (time_t)lastckp->timestamp; + if (argp->timestamp < lastckp->timestamp) { + __db_errx(env, DB_STR_A("2551", + "[%lu][%lu] Last known checkpoint [%lu, %lu] has a " + "timestamp %s smaller than this checkpoint timestamp %s.", + "%lu %lu %lu %lu %s %s"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)lastckp->lsn.file, + (u_long)lastckp->lsn.offset, + __os_ctime(&lastckp_time, timebuf), + __os_ctime(&ckp_time, timebuf)); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + +cont: + cvp.env = env; + cvp.lsn = *lsnp; + cvp.ckp_lsn = argp->ckp_lsn; + + /* + * Verify that all active txn's first lsn is greater than + * argp->ckp_lsn. + */ + if ((ret = __iterate_txninfo(lvh, 0, 0, + __lv_ckp_vrfy_handler, &cvp)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + ckpinfo.timestamp = argp->timestamp; + ckpinfo.lsn = *lsnp; + ckpinfo.ckplsn = argp->ckp_lsn; + + if ((ret = __put_ckp_info(lvh, &ckpinfo)) != 0) + goto err; +out: +err: + if (argp) + __os_free(env, argp); + if (lastckp) + __os_free(env, lastckp); + return (ret); +} + +static int +__lv_ckp_vrfy_handler(lvinfo, txninfop, param) + DB_LOG_VRFY_INFO *lvinfo; + VRFY_TXN_INFO *txninfop; + void *param; +{ + struct __ckp_verify_params *cvp; + int ret; + + ret = 0; + cvp = (struct __ckp_verify_params *)param; + /* ckp_lsn should be less than any active txn's first lsn. */ + if (txninfop->status == TXN_STAT_ACTIVE && LOG_COMPARE(&(cvp->ckp_lsn), + &(txninfop->first_lsn)) >= 0) { + __db_errx(cvp->env, DB_STR_A("2552", + "[%lu][%lu] ckp log's ckp_lsn [%lu][%lu] greater than " + "active txn %lx 's first lsn [%lu][%lu]", + "%lu %lu %lu %lu %lx %lu %lu"), + (u_long)cvp->lsn.file, (u_long)cvp->lsn.offset, + (u_long)cvp->ckp_lsn.file, (u_long)cvp->ckp_lsn.offset, + (u_long)txninfop->txnid, + (u_long)txninfop->first_lsn.file, + (u_long)txninfop->first_lsn.offset); + lvinfo->flags |= DB_LOG_VERIFY_ERR; + if (!F_ISSET(lvinfo, DB_LOG_VERIFY_CAF)) + /* Stop the iteration. */ + ret = DB_LOG_VERIFY_BAD; + } + + return (ret); +} + +/* + * PUBLIC: int __txn_child_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_child_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_child_args *argp; + DB_LOG_VRFY_INFO *lvh; + VRFY_TXN_INFO *ptvi, *ptvi2; + int ret, ret2, started; + + /* + * This function is called when a txn T0's child txn T1 commits. Before + * this log record we don't know T0 and T1's relationship. This means + * we never know the T0 has an active child txn T1, all child txns + * we know are committed. + */ + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + ptvi = ptvi2 = NULL; + ret = ret2 = started = 0; + + if ((ret = __txn_child_read(env, dbtp->data, &argp)) != 0) + return (ret); + + /* + * The __lv_log_fwdscr_oncmt call must precede LOG_VRFY_PROC otherwise + * this txn will be taken as an aborted txn. + */ + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + if ((ret = __lv_log_fwdscr_oncmt(lvh, argp->c_lsn, argp->child, + argp->txnp->txnid, 0)) != 0) + goto err; + if ((ret = __lv_log_fwdscr_onrec(lvh, argp->txnp->txnid, + argp->type, argp->prev_lsn, *lsnp)) != 0) + goto err; + goto out;/* We are done. */ + } + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + if ((ret = __return_txn_pages(lvh, argp->child, + argp->txnp->txnid)) != 0 && ret != DB_NOTFOUND) + goto err;/* Some txns may have updated no pages. */ + + /* Update parent txn info. */ + if ((ret = __get_txn_vrfy_info(lvh, argp->txnp->txnid, &ptvi)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && + ((ret2 = __txn_started(lvh, lvh->lv_config->start_lsn, + argp->txnp->txnid, &started)) == 0) && started != 0) { + ret = 0; + goto err; + } + if (ret2 != 0) + ret = ret2; + __db_errx(lvh->dbenv->env, DB_STR_A("2553", + "[%lu][%lu] Can not find an active transaction's " + "information, txnid: %lx.", "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)argp->txnp->txnid); + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + + } + if (ptvi == NULL) { + if (ret == DB_NOTFOUND && + F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + + } + ptvi->nchild_commit++; + /* + * The start of this child txn caused lvh->ntxn_active to be + * incremented unnecessarily, so decrement it. + */ + lvh->ntxn_active--; + if (ptvi->status != TXN_STAT_ACTIVE) { + __db_errx(lvh->dbenv->env, DB_STR_A("2554", + "[%lu][%lu] Parent txn %lx ended " + "before child txn %lx ends.", "%lu %lu %lx %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)argp->txnp->txnid, (u_long)argp->child); + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + if ((ret = __put_txn_vrfy_info(lvh, ptvi)) != 0) + goto err; + + /* Update child txn info. */ + if ((ret = __get_txn_vrfy_info(lvh, argp->child, &ptvi2)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && + ((ret2 = __txn_started(lvh, lvh->lv_config->start_lsn, + argp->child, &started)) == 0) && started != 0) { + ret = 0; + goto err; + } + if (ret2 != 0) + ret = ret2; + __db_errx(lvh->dbenv->env, DB_STR_A("2555", + "[%lu][%lu] Can not find an active " + "transaction's information, txnid: %lx.", + "%lu %lu %lx"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)argp->child); + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + + } + if (ptvi2 == NULL) { + if (ret == DB_NOTFOUND && + F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + + } + if (ptvi2->status != TXN_STAT_ACTIVE) { + __db_errx(lvh->dbenv->env, DB_STR_A("2556", + "[%lu][%lu] Txn %lx ended before it commits.", + "%lu %lu %lx"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)argp->child); + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + ptvi2->status = TXN_STAT_COMMIT; + if ((ret = __put_txn_vrfy_info(lvh, ptvi2)) != 0) + goto err; +out: +err: + __os_free(env, argp); + if (ptvi != NULL && (ret2 = __free_txninfo(ptvi)) != 0 && ret == 0) + ret = ret2; + if (ptvi2 != NULL && (ret2 = __free_txninfo(ptvi2)) != 0 && ret == 0) + ret = ret2; + + return (ret); +} + +/* + * PUBLIC: int __txn_xa_regop_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_xa_regop_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_xa_regop_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __txn_xa_regop_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_prepare_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_prepare_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_prepare_args *argp; + DB_LOG_VRFY_INFO *lvh; + VRFY_TXN_INFO *ptvi; + int ret, ret2, started; + + ret = ret2 = started = 0; + ptvi = NULL; + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __txn_prepare_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + + if ((ret = __get_txn_vrfy_info(lvh, argp->txnp->txnid, &ptvi)) != 0 && + ret != DB_NOTFOUND) + goto err; + + if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && + ((ret2 = __txn_started(lvh, lvh->lv_config->start_lsn, + argp->txnp->txnid, &started)) == 0) && started != 0) { + ret = 0; + goto err; + } + if (ret2 != 0) + ret = ret2; + __db_errx(lvh->dbenv->env, DB_STR_A("2557", + "[%lu][%lu] Can not find an active transaction's " + "information, txnid: %lx.", "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)argp->txnp->txnid); + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + + } + if (ptvi == NULL) { + if (ret == DB_NOTFOUND && + F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + + } + DB_ASSERT(env, + (IS_ZERO_LSN(ptvi->prep_lsn) && ptvi->status != TXN_STAT_PREPARE) || + (!IS_ZERO_LSN(ptvi->prep_lsn) && ptvi->status == TXN_STAT_PREPARE)); + + lvh->ntxn_prep++; + lvh->ntxn_active--; + + if (!IS_ZERO_LSN(ptvi->prep_lsn)) {/* Prepared more than once. */ + + __db_errx(lvh->dbenv->env, DB_STR_A("2558", + "[%lu][%lu] Multiple txn_prepare log record for " + "transaction %lx, previous prepare lsn: [%lu, %lu].", + "%lu %lu %lx %lu %lu"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)argp->txnp->txnid, + (u_long)ptvi->prep_lsn.file, (u_long)ptvi->prep_lsn.offset); + } else { + ptvi->prep_lsn = *lsnp; + ptvi->status = TXN_STAT_PREPARE; + } + ret = __put_txn_vrfy_info(lvh, ptvi); +out: +err: + __os_free(env, argp); + if (ptvi != NULL && (ret2 = __free_txninfo(ptvi)) != 0 && ret == 0) + ret = ret2; + return (ret); +} + +/* + * PUBLIC: int __txn_recycle_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_recycle_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_recycle_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + ret = 0; + + if ((ret = __txn_recycle_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + + /* Add recycle info for all txns whose ID is in the [min, max] range. */ + ret = __add_recycle_lsn_range(lvh, lsnp, argp->min, argp->max); + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* Handle log types having timestamps, so far only __txn_ckp and __txn_regop. */ +static int +__lv_on_timestamp(lvh, lsn, timestamp, logtype) + DB_LOG_VRFY_INFO *lvh; + const DB_LSN *lsn; + int32_t timestamp; + u_int32_t logtype; +{ + VRFY_TIMESTAMP_INFO *ltsinfo; + int ret; + + ltsinfo = NULL; + ret = 0; + if ((ret = __get_latest_timestamp_info(lvh, *lsn, <sinfo)) == 0) { + DB_ASSERT(lvh->dbenv->env, ltsinfo != NULL); + if (ltsinfo->timestamp >= timestamp && + F_ISSET(lvh, DB_LOG_VERIFY_VERBOSE)) { + __db_errx(lvh->dbenv->env, DB_STR_A("2559", + "[%lu][%lu] [WARNING] This log record of type %s " + "does not have a greater time stamp than " + "[%lu, %lu] of type %s", "%lu %lu %s %lu %lu %s"), + (u_long)lsn->file, (u_long)lsn->offset, + LOGTYPE_NAME(lvh, logtype), + (u_long)ltsinfo->lsn.file, + (u_long)ltsinfo->lsn.offset, + LOGTYPE_NAME(lvh, ltsinfo->logtype)); + lvh->flags |= DB_LOG_VERIFY_WARNING; + } + } + if (ltsinfo != NULL) + __os_free(lvh->dbenv->env, ltsinfo); + if (ret == DB_NOTFOUND) + ret = 0; + + return (ret); +} + +/* + * Called whenever the log record belongs to a transaction. + */ +static int +__lv_on_txn_logrec(lvh, lsnp, prev_lsnp, txnp, type, dbregid) + DB_LOG_VRFY_INFO *lvh; + const DB_LSN *lsnp; + const DB_LSN *prev_lsnp; + const DB_TXN *txnp; + u_int32_t type; + int32_t dbregid; +{ + DBT fid; + VRFY_TXN_INFO *pvti; + u_int32_t txnid; + VRFY_FILEREG_INFO *fregp; + int ret, ret2, started; + + ret = ret2 = started = 0; + pvti = NULL; + fregp = NULL; + lvh->lrtypes[type]++;/* Increment per-type log record count. */ + txnid = txnp->txnid; + memset(&fid, 0, sizeof(fid)); + + if (dbregid == INVAL_DBREGID) + goto cont; + if ((ret = __get_filereg_by_dbregid(lvh, dbregid, &fregp)) != 0) { + if (ret == DB_NOTFOUND) { + /* + * It's likely that we are verifying a subset of logs + * and the DBREG_OPEN is outside the range. + */ + if (!F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + __db_msg(lvh->dbenv->env, DB_STR_A("2560", + "[%lu][%lu] Transaction %lx is updating a " + "db file %d not registered.", + "%lu %lu %lx %d"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)txnp->txnid, dbregid); + goto cont; + } else + goto err; + } + + fid = fregp->fileid; +cont: + if (IS_ZERO_LSN(*prev_lsnp) && + (ret = __lv_on_new_txn(lvh, lsnp, txnp, type, dbregid, &fid)) != 0) + goto err; + + if ((ret = __get_txn_vrfy_info(lvh, txnid, &pvti)) != 0 && + ret != DB_NOTFOUND) + goto err; + + /* If can't find the txn, there is an internal error. */ + if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + /* + * If verifying from middle, it's expected that txns begun + * before start are not found. + */ + if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && ((ret2 = + __txn_started(lvh, lvh->lv_config->start_lsn, txnid, + &started)) == 0) && started != 0) { + ret = 0; + goto out;/* We are done. */ + } + if (ret2 != 0) + ret = ret2; + + __db_errx(lvh->dbenv->env, DB_STR_A("2561", + "[%lu][%lu] Can not find an active transaction's " + "information, txnid: %lx.", "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, (u_long)txnid); + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + } + + /* Can't proceed without the txn info. */ + if (pvti == NULL) { + if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + } + + /* Check if prev lsn is wrong, and some log records may be missing. */ + if (!IS_ZERO_LSN(*prev_lsnp) && + LOG_COMPARE(prev_lsnp, &(pvti->cur_lsn)) != 0) { + __db_errx(lvh->dbenv->env, DB_STR_A("2562", + "[%lu][%lu] Previous record for transaction %lx is " + "[%lu][%lu] and prev_lsn is [%lu][%lu].", + "%lu %lu %lx %lu %lu %lu %lu"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)pvti->txnid, + (u_long)pvti->cur_lsn.file, (u_long)pvti->cur_lsn.offset, + (u_long)prev_lsnp->file, (u_long)prev_lsnp->offset); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + + /* + * After the txn is prepared, the only valid log record for this txn + * is the commit record. + */ + if (pvti->status == TXN_STAT_PREPARE && type != DB___txn_regop) { + __db_errx(lvh->dbenv->env, DB_STR_A("2563", + "[%lu][%lu] Update action is performed in a " + "prepared transaction %lx.", "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, (u_long)txnid); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + pvti->cur_lsn = *lsnp; + pvti->flags = txnp->flags; + if (dbregid != INVAL_DBREGID && fid.size > 0 && + (ret = __add_file_updated(pvti, &fid, dbregid)) != 0) + goto err; + if ((ret = __put_txn_vrfy_info(lvh, pvti)) != 0) + goto err; +out: +err: + if (pvti != NULL && (ret2 = __free_txninfo(pvti)) != 0 && ret == 0) + ret = ret2; + if (fregp != NULL && + (ret2 = __free_filereg_info(fregp)) != 0 && ret == 0) + ret = ret2; + return (ret); +} + +/* + * Called whenever a new transaction is started, including child transactions. + */ +static int +__lv_on_new_txn (lvh, lsnp, txnp, type, dbregid, fid) + DB_LOG_VRFY_INFO *lvh; + const DB_LSN *lsnp; + const DB_TXN *txnp; + u_int32_t type; + int32_t dbregid; + const DBT *fid; +{ + VRFY_TXN_INFO vti, *pvti, *vtip; + int ret, tret; + u_int32_t txnid; + ENV *env; + + ret = tret = 0; + txnid = txnp->txnid; + pvti = NULL; + memset(&vti, 0, sizeof(vti)); + vti.txnid = txnid; + env = lvh->dbenv->env; + /* Log record type, may be used later. Pass lint checks. */ + COMPQUIET(type, 0); + + /* + * It's possible that the new txn is a child txn, we will decrement + * this value in __txn_child_verify when we realize this, because + * this value only records the number of outermost active txns. + */ + lvh->ntxn_active++; + + if ((ret = __get_txn_vrfy_info(lvh, txnid, &pvti)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND) + vtip = &vti; + else {/* The txnid is reused, may be illegal. */ + vtip = pvti; + /* + * If this txn id was recycled, this use is legal. A legal + * recyclable txnid is immediately not recyclable after + * it's recycled here. And it's impossible for vtip->status + * to be TXN_STAT_ACTIVE, since we have made it TXN_STAT_ABORT + * when we detected this txn id recycle just now. + */ + if (vtip->num_recycle > 0 && LOG_COMPARE(&(vtip->recycle_lsns + [vtip->num_recycle - 1]), lsnp) < 0) { + DB_ASSERT(env, vtip->status != TXN_STAT_ACTIVE); + if ((ret = __rem_last_recycle_lsn(vtip)) != 0) + goto err; + if ((ret = __clear_fileups(vtip)) != 0) + goto err; + + vtip->status = 0; + ZERO_LSN(vtip->prep_lsn); + ZERO_LSN(vtip->last_lsn); + + vtip->nchild_active = 0; + vtip->nchild_commit = 0; + vtip->nchild_abort = 0; + /* + * We may goto the else branch if this txn has child txns + * before any updates done on its behalf. So we should + * exclude this possibility to conclude a failed verification. + */ + } else if (vtip->nchild_active + vtip->nchild_commit + + vtip->nchild_abort == 0) { + __db_errx(lvh->dbenv->env, DB_STR_A("2564", + "[%lu][%lu] Transaction id %lx reused without " + "being recycled with a __txn_recycle.", + "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)txnid); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + } + + vtip->first_lsn = *lsnp; + vtip->cur_lsn = *lsnp; + vtip->flags = txnp->flags; + + /* + * It's possible that the first log rec does not update any file, + * like the __txn_child type of record. + */ + if (fid->size > 0 && (ret = + __add_file_updated(vtip, fid, dbregid)) != 0) + goto err; + if ((ret = __put_txn_vrfy_info(lvh, vtip)) != 0) + goto err; + +err: + if (pvti != NULL && (tret = __free_txninfo(pvti)) != 0 && ret == 0) + ret = tret; + if ((tret = __free_txninfo_stack(&vti)) != 0 && ret == 0) + ret = tret; + + return (ret); +} + +/* Called when we detect that a new log file is used. */ +static int +__lv_new_logfile_vrfy(lvh, lsnp) + DB_LOG_VRFY_INFO *lvh; + const DB_LSN *lsnp; +{ + int ret; + + ret = 0; + if (IS_ZERO_LSN(lvh->last_lsn) || lvh->last_lsn.file == lsnp->file) { + lvh->last_lsn = *lsnp; + return (0); + } + + /* + * If file number changed, it must have been incremented, + * and the offset is 0. + * */ + if (lsnp->file - lvh->last_lsn.file != 1 || lsnp->offset != + __lv_first_offset(lvh->dbenv->env)) { + __db_errx(lvh->dbenv->env, + "[%lu][%lu] Last log record verified ([%lu][%lu]) is not " + "immidiately before the current log record.", + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)lvh->last_lsn.file, (u_long)lvh->last_lsn.offset); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + + lvh->last_lsn = *lsnp; +err: + return (ret); +} + +static u_int32_t +__lv_first_offset(env) + ENV *env; +{ + u_int32_t sz; + + if (CRYPTO_ON(env)) + sz = HDR_CRYPTO_SZ; + else + sz = HDR_NORMAL_SZ; + + sz += sizeof(LOGP); + + return sz; +} + +/* Called when we see a non-transactional update log record. */ +static int +__lv_on_nontxn_update(lvh, lsnp, txnid, logtype, fileid) + DB_LOG_VRFY_INFO *lvh; + const DB_LSN *lsnp; + u_int32_t txnid, logtype; + int32_t fileid; +{ + lvh->lrtypes[logtype]++; + COMPQUIET(txnid, 0); + if (fileid != INVAL_DBREGID) { + lvh->non_txnup_cnt++; + __db_msg(lvh->dbenv->env, DB_STR_A("2565", + "[%lu][%lu] Non-transactional update, " + "log type: %u, fileid: %d.", "%lu %lu %u %d"), + (u_long)lsnp->file, (u_long)lsnp->offset, logtype, fileid); + } + + return (0); +} + +static int +__lv_on_txn_aborted(lvinfo) + DB_LOG_VRFY_INFO *lvinfo; +{ + int ret, ret2, sres; + VRFY_TXN_INFO *ptvi; + u_int32_t abtid; + DB_LSN lsn, slsn; + + ret = ret2 = sres = 0; + abtid = lvinfo->aborted_txnid; + lsn = lvinfo->aborted_txnlsn; + slsn = lvinfo->lv_config->start_lsn; + ptvi = NULL; + + if ((ret = __del_txn_pages(lvinfo, lvinfo->aborted_txnid)) != 0 && + ret != DB_NOTFOUND) + goto err;/* Some txns may have updated no pages. */ + ret = __get_txn_vrfy_info(lvinfo, lvinfo->aborted_txnid, &ptvi); + if (ret == DB_NOTFOUND && !F_ISSET(lvinfo, DB_LOG_VERIFY_PARTIAL)) { + /* + * If verifying from slsn and the txn abtid started before + * slsn, it's expected that we can't find the txn. + */ + if (!IS_ZERO_LSN(slsn) && (ret2 = __txn_started(lvinfo, slsn, + abtid, &sres)) == 0 && sres != 0) { + ret = 0; + goto err; + } + if (ret2 != 0) + ret = ret2;/* Use the same error msg below. */ + __db_errx(lvinfo->dbenv->env, DB_STR_A("2566", + "[%lu][%lu] Can not find an active transaction's " + "information, txnid: %lx.", "%lu %lu %lx"), + (u_long)lsn.file, (u_long)lsn.offset, + (u_long)lvinfo->aborted_txnid); + ON_ERROR(lvinfo, DB_LOG_VERIFY_INTERR); + } + if (ptvi == NULL) { + if (ret == DB_NOTFOUND && + F_ISSET(lvinfo, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + } + ptvi->status = TXN_STAT_ABORT; + lvinfo->ntxn_abort++; + lvinfo->ntxn_active--; + /* Report txn stats. */ + if (F_ISSET(lvinfo, DB_LOG_VERIFY_VERBOSE)) { + __db_msg(lvinfo->dbenv->env, DB_STR_A("2567", + "[%lu][%lu] Txn %lx aborted after this log record.", + "%lu %lu %lx"), (u_long)lvinfo->aborted_txnlsn.file, + (u_long)lvinfo->aborted_txnlsn.offset, (u_long)ptvi->txnid); + __db_msg(lvinfo->dbenv->env, DB_STR_A("2568", + "\tThe number of active, committed and aborted child txns " + "of txn %lx: %u, %u, %u.", "%lx %u %u %u"), + (u_long)ptvi->txnid, ptvi->nchild_active, + ptvi->nchild_commit, ptvi->nchild_abort); + } + lvinfo->aborted_txnid = 0; + lvinfo->aborted_txnlsn.file = lvinfo->aborted_txnlsn.offset = 0; + if ((ret = __put_txn_vrfy_info(lvinfo, ptvi)) != 0) + goto err; + if ((ret = __free_txninfo(ptvi)) != 0) + goto err; +out: +err: + return (ret); +} diff --git a/src/log/log_verify_stub.c b/src/log/log_verify_stub.c new file mode 100644 index 00000000..e6589a50 --- /dev/null +++ b/src/log/log_verify_stub.c @@ -0,0 +1,79 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef HAVE_VERIFY + +#include "db_config.h" +#include "db_int.h" + +static int __db_log_novrfy __P((ENV *)); +int __log_verify_pp __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *)); +int __log_verify __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *)); +int __log_verify_wrap __P((ENV *env, const char *, u_int32_t, const char *, + const char *, time_t, time_t, u_int32_t, u_int32_t, u_int32_t, u_int32_t, + int, int)); + +/* + * __db_log_novrfy -- + * Error when a Berkeley DB build doesn't include the access method. + */ +static int +__db_log_novrfy(env) + ENV *env; +{ + __db_errx(env, DB_STR("2523", + "library build did not include support for log verification")); + return (DB_OPNOTSUP); +} + +int +__log_verify_pp(dbenv, lvconfig) + DB_ENV *dbenv; + const DB_LOG_VERIFY_CONFIG *lvconfig; +{ + COMPQUIET(lvconfig, NULL); + + /* The dbenv is intact, callers should properly take care of it. */ + return (__db_log_novrfy(dbenv->env)); +} + +int +__log_verify(dbenv, lvconfig) + DB_ENV *dbenv; + const DB_LOG_VERIFY_CONFIG *lvconfig; +{ + COMPQUIET(lvconfig, NULL); + + return (__db_log_novrfy(dbenv->env)); +} + +int +__log_verify_wrap(env, envhome, cachesize, dbfile, dbname, + stime, etime, stfile, stoffset, efile, eoffset, caf, verbose) + ENV *env; + const char *envhome, *dbfile, *dbname; + time_t stime, etime; + u_int32_t cachesize, stfile, stoffset, efile, eoffset; + int caf, verbose; +{ + COMPQUIET(envhome, NULL); + COMPQUIET(dbfile, NULL); + COMPQUIET(dbname, NULL); + COMPQUIET(stime, 0); + COMPQUIET(etime, 0); + COMPQUIET(cachesize, 0); + COMPQUIET(stfile, 0); + COMPQUIET(stoffset, 0); + COMPQUIET(efile, 0); + COMPQUIET(eoffset, 0); + COMPQUIET(caf, 0); + COMPQUIET(verbose, 0); + return (__db_log_novrfy(env)); +} + +#endif /* !HAVE_VERIFY */ diff --git a/src/log/log_verify_util.c b/src/log/log_verify_util.c new file mode 100644 index 00000000..88682921 --- /dev/null +++ b/src/log/log_verify_util.c @@ -0,0 +1,2234 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * This file contains helper functions like data structure and in-memory db + * management, which are used to store various log verification information. + */ +#include "db_config.h" +#include "db_int.h" + +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/qam.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" +#include "dbinc/fop.h" + +#include "dbinc/log_verify.h" + +#define BDBOP(op) do { \ + ret = (op); \ + if (ret != 0) { \ + __lv_on_bdbop_err(ret); \ + goto err; \ + } \ +} while (0) + +#define BDBOP2(dbenv, op, funct) do { \ + ret = (op); \ + if (ret != 0) { \ + __lv_on_bdbop_err(ret); \ + __db_err(dbenv->env, ret, "\n%s", funct); \ + return (ret); \ + } \ +} while (0) + +#define BDBOP3(dbenv, op, excpt, funct) do { \ + ret = (op); \ + if (ret != 0) { \ + __lv_on_bdbop_err(ret); \ + if (ret != excpt) { \ + __db_err(dbenv->env, ret, "\n%s", funct); \ + return (ret); \ + } \ + } \ +} while (0) + +typedef int (*btcmp_funct)(DB *, const DBT *, const DBT *); +typedef int (*dupcmp_funct)(DB *, const DBT *, const DBT *); + +static int __lv_add_recycle_handler __P(( + DB_LOG_VRFY_INFO *, VRFY_TXN_INFO *, void *)); +static int __lv_add_recycle_lsn __P((VRFY_TXN_INFO *, const DB_LSN *)); +static size_t __lv_dbt_arrsz __P((const DBT *, u_int32_t)); +static int __lv_fidpgno_cmp __P((DB *, const DBT *, const DBT *)); +static int __lv_i32_cmp __P((DB *, const DBT *, const DBT *)); +static int __lv_lsn_cmp __P((DB *, const DBT *, const DBT *)); +static void __lv_on_bdbop_err __P((int)); +static int __lv_open_db __P((DB_ENV *, DB **, DB_THREAD_INFO *, + const char *, int, btcmp_funct, u_int32_t, dupcmp_funct)); +static int __lv_pack_filereg __P((const VRFY_FILEREG_INFO *, DBT *)); +static int __lv_pack_txn_vrfy_info __P(( + const VRFY_TXN_INFO *, DBT *, DBT *data)); +static int __lv_seccbk_fname __P((DB *, const DBT *, const DBT *, DBT *)); +static int __lv_seccbk_lsn __P((DB *, const DBT *, const DBT *, DBT *)); +static int __lv_seccbk_txnpg __P((DB *, const DBT *, const DBT *, DBT *)); +static void __lv_setup_logtype_names __P((DB_LOG_VRFY_INFO *lvinfo)); +static int __lv_txnrgns_lsn_cmp __P((DB *, const DBT *, const DBT *)); +static int __lv_ui32_cmp __P((DB *, const DBT *, const DBT *)); +static int __lv_unpack_txn_vrfy_info __P((VRFY_TXN_INFO **, const DBT *)); +static int __lv_unpack_filereg __P((const DBT *, VRFY_FILEREG_INFO **)); + +static void __lv_on_bdbop_err(ret) + int ret; +{ + /* Pass lint checks. We need the ret and this function for debugging. */ + COMPQUIET(ret, 0); +} + +/* + * __create_log_vrfy_info -- + * Initialize and return a log verification handle to be used throughout + * a verification process. + * + * PUBLIC: int __create_log_vrfy_info __P((const DB_LOG_VERIFY_CONFIG *, + * PUBLIC: DB_LOG_VRFY_INFO **, DB_THREAD_INFO *)); + */ +int +__create_log_vrfy_info(cfg, lvinfopp, ip) + const DB_LOG_VERIFY_CONFIG *cfg; + DB_LOG_VRFY_INFO **lvinfopp; + DB_THREAD_INFO *ip; +{ + const char *envhome; + int inmem, ret; + u_int32_t cachesz, envflags; + const char *dbf1, *dbf2, *dbf3, *dbf4, *dbf5, *dbf6, *dbf7, *dbf8, + *dbf9, *dbf10, *dbf11; + DB_LOG_VRFY_INFO *lvinfop; + + dbf1 = "__db_log_vrfy_txninfo.db"; + dbf2 = "__db_log_vrfy_fileregs.db"; + dbf3 = "__db_log_vrfy_pgtxn.db"; + dbf4 = "__db_log_vrfy_lsntime.db"; + dbf5 = "__db_log_vrfy_timelsn.db"; + dbf6 = "__db_log_vrfy_ckps.db"; + dbf7 = "__db_log_vrfy_dbregids.db"; + dbf8 = "__db_log_vrfy_fnameuid.db"; + dbf9 = "__db_log_vrfy_timerange.db"; + dbf10 = "__db_log_vrfy_txnaborts.db"; + dbf11 = "__db_log_vrfy_txnpg.db"; + + envhome = cfg->temp_envhome; + lvinfop = NULL; + cachesz = cfg->cachesize; + if (cachesz== 0) + cachesz = 1024 * 1024 * 256; + + BDBOP(__os_malloc(NULL, sizeof(DB_LOG_VRFY_INFO), &lvinfop)); + memset(lvinfop, 0, sizeof(DB_LOG_VRFY_INFO)); + lvinfop->ip = ip; + __lv_setup_logtype_names(lvinfop); + /* Avoid the VERIFY_PARTIAL bit being cleared if no ckp_lsn exists. */ + lvinfop->valid_lsn.file = lvinfop->valid_lsn.offset = (u_int32_t)-1; + + /* + * The envhome parameter determines if we will use an in-memory + * environment and databases. + */ + if (envhome == NULL) { + envflags = DB_PRIVATE; + inmem = 1; + } else { + envflags = 0; + inmem = 0; + } + + /* Create log verify internal database environment. */ + BDBOP(db_env_create(&lvinfop->dbenv, 0)); + BDBOP(__memp_set_cachesize(lvinfop->dbenv, 0, cachesz, 1)); + /* + * Log verification internal db environment should be accessed + * single-threaded. No transaction semantics needed. + */ + BDBOP(__env_open(lvinfop->dbenv, envhome, + envflags | DB_CREATE | DB_INIT_MPOOL, 0666)); + + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txninfo, ip, dbf1, + inmem, __lv_ui32_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->fileregs, ip, dbf2, + inmem, NULL, 0, NULL)); + + /* No dup allowed, always overwrite data with same key. */ + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->dbregids, ip, dbf7, + inmem, __lv_i32_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->pgtxn, ip, dbf3, + inmem, __lv_fidpgno_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txnpg, ip, dbf11, + inmem, __lv_ui32_cmp, DB_DUP | DB_DUPSORT, __lv_fidpgno_cmp)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->lsntime, ip, dbf4, + inmem, __lv_lsn_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->timelsn, ip, dbf5, + inmem, __lv_i32_cmp, DB_DUP | DB_DUPSORT, __lv_lsn_cmp)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txnaborts, ip, dbf10, + inmem, __lv_lsn_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->ckps, ip, dbf6, + inmem, __lv_lsn_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->fnameuid, ip, dbf8, + inmem, NULL, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txnrngs, ip, dbf9, + inmem, __lv_ui32_cmp, DB_DUP | DB_DUPSORT, __lv_txnrgns_lsn_cmp)); + + BDBOP(__db_associate(lvinfop->lsntime, ip, NULL, + lvinfop->timelsn, __lv_seccbk_lsn, DB_CREATE)); + BDBOP(__db_associate(lvinfop->fileregs, ip, NULL, + lvinfop->fnameuid, __lv_seccbk_fname, DB_CREATE)); + BDBOP(__db_associate(lvinfop->pgtxn, ip, NULL, + lvinfop->txnpg, __lv_seccbk_txnpg, DB_CREATE)); + + *lvinfopp = lvinfop; + + return (0); +err: + if (lvinfop->dbenv && ret != 0) + __db_err(lvinfop->dbenv->env, ret, "__create_log_vrfy_info"); + (void)__destroy_log_vrfy_info(lvinfop); + + return (ret); +} + +/* + * __destroy_log_vrfy_info -- + * Destroy and free a log verification handle. + * + * PUBLIC: int __destroy_log_vrfy_info __P((DB_LOG_VRFY_INFO *)); + */ +int +__destroy_log_vrfy_info(lvinfop) + DB_LOG_VRFY_INFO *lvinfop; +{ + int ret; + + ret = 0; + if (lvinfop == NULL) + return (0); + + if (lvinfop->txnaborts != NULL && + (ret = __db_close(lvinfop->txnaborts, NULL, 0)) != 0) + goto err; + if (lvinfop->txninfo != NULL && + (ret = __db_close(lvinfop->txninfo, NULL, 0)) != 0) + goto err; + if (lvinfop->dbregids != NULL && + (ret = __db_close(lvinfop->dbregids, NULL, 0)) != 0) + goto err; + if (lvinfop->fileregs != NULL && + (ret = __db_close(lvinfop->fileregs, NULL, 0)) != 0) + goto err; + if (lvinfop->pgtxn != NULL && + (ret = __db_close(lvinfop->pgtxn, NULL, 0)) != 0) + goto err; + if (lvinfop->lsntime != NULL && + (ret = __db_close(lvinfop->lsntime, NULL, 0)) != 0) + goto err; + if (lvinfop->ckps != NULL && + (ret = __db_close(lvinfop->ckps, NULL, 0)) != 0) + goto err; + if (lvinfop->txnrngs != NULL && + (ret = __db_close(lvinfop->txnrngs, NULL, 0)) != 0) + goto err; + if (lvinfop->fnameuid != NULL && + (ret = __db_close(lvinfop->fnameuid, NULL, 0)) != 0) + goto err; + if (lvinfop->timelsn != NULL && + (ret = __db_close(lvinfop->timelsn, NULL, 0)) != 0) + goto err; + if (lvinfop->txnpg != NULL && + (ret = __db_close(lvinfop->txnpg, NULL, 0)) != 0) + goto err; + if (lvinfop->dbenv != NULL && + (ret = __env_close(lvinfop->dbenv, 0)) != 0) + goto err; +err: + __os_free(NULL, lvinfop); + + return (ret); +} + +/* Secondary index callback function for DB_LOG_VRFY_INFO->timelsn. */ +static int +__lv_seccbk_fname(secdb, key, data, result) + DB *secdb; + const DBT *key; + const DBT *data; + DBT *result; +{ + int ret, tret; + VRFY_FILEREG_INFO *freg; + char *buf; + size_t buflen, slen; + + ret = tret = 0; + COMPQUIET(key, NULL); + if ((ret = __lv_unpack_filereg(data, &freg)) != 0) + goto out; + if (freg->fname == NULL || (slen = strlen(freg->fname)) == 0) { + ret = DB_DONOTINDEX; + goto out; + } + + buflen = (slen + 1) * sizeof(char); + if ((ret = __os_umalloc(secdb->dbenv->env, buflen, &buf)) != 0) + goto out; + (void)strcpy(buf, freg->fname); + result->size = (u_int32_t)buflen; + result->flags |= DB_DBT_APPMALLOC; + result->data = buf; +out: + if (freg != NULL && (tret = __free_filereg_info(freg)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* Secondary index callback function for DB_LOG_VRFY_INFO->txnpg. */ +static int +__lv_seccbk_txnpg(secdb, key, data, result) + DB *secdb; + const DBT *key; + const DBT *data; + DBT *result; +{ + COMPQUIET(key, NULL); + COMPQUIET(secdb, NULL); + /* Txnid is the secondary key, and it's all the data dbt has. */ + result->data = data->data; + result->size = data->size; + + return (0); +} + +/* Secondary index callback function for DB_LOG_VRFY_INFO->timelsn. */ +static int +__lv_seccbk_lsn(secdb, key, data, result) + DB *secdb; + const DBT *key; + const DBT *data; + DBT *result; +{ + VRFY_TIMESTAMP_INFO *lvti; + + COMPQUIET(key, NULL); + COMPQUIET(secdb, NULL); + + lvti = (VRFY_TIMESTAMP_INFO *)data->data; + result->data = &(lvti->timestamp); + result->size = sizeof(lvti->timestamp); + + return (0); +} + +/* + * Open a BTREE database handle, optionally set the btree compare function + * and flags if any. + */ +static int +__lv_open_db(dbenv, dbpp, ip, name, inmem, cmpf, sflags, dupcmpf) + DB_ENV *dbenv; + DB **dbpp; + const char *name; + int inmem; + btcmp_funct cmpf; + u_int32_t sflags; + dupcmp_funct dupcmpf; + DB_THREAD_INFO *ip; +{ + int ret; + const char *dbfname, *dbname; + DB *dbp; + + dbp = NULL; + ret = 0; + if (inmem) { + dbfname = NULL; + dbname = name; + } else { + dbfname = name; + dbname = NULL; + } + + BDBOP(db_create(&dbp, dbenv, 0)); + + if (cmpf != NULL) + BDBOP(__bam_set_bt_compare(dbp, cmpf)); + if (dupcmpf != NULL) + dbp->dup_compare = dupcmpf; + if (sflags != 0) + BDBOP(__db_set_flags(dbp, sflags)); + /* No concurrency needed, a big page size reduces overflow pages. */ + BDBOP(__db_set_pagesize(dbp, 16 * 1024)); + + BDBOP(__db_open(dbp, ip, NULL, dbfname, dbname, DB_BTREE, DB_CREATE, + 0666, PGNO_BASE_MD)); + + *dbpp = dbp; + + return (0); +err: + if (dbenv != NULL && ret != 0) + __db_err(dbenv->env, ret, "__lv_open_db"); + if (dbp != NULL) + (void)__db_close(dbp, NULL, 0); + + return (ret); +} + +/* Btree compare function for a [fileid, pgno] key. */ +static int +__lv_fidpgno_cmp(db, dbt1, dbt2) + DB *db; + const DBT *dbt1; + const DBT *dbt2; +{ + db_pgno_t pgno1, pgno2; + int ret; + size_t len; + + COMPQUIET(db, NULL); + len = DB_FILE_ID_LEN; + ret = memcmp(dbt1->data, dbt2->data, len); + if (ret == 0) { + memcpy(&pgno1, (u_int8_t *)dbt1->data + len, + sizeof(pgno1)); + memcpy(&pgno2, (u_int8_t *)dbt2->data + len, + sizeof(pgno2)); + ret = NUMCMP(pgno1, pgno2); + } + + return (ret); +} + +/* Btree compare function for a int32_t type of key. */ +static int +__lv_i32_cmp(db, dbt1, dbt2) + DB *db; + const DBT *dbt1; + const DBT *dbt2; +{ + int32_t k1, k2; + + COMPQUIET(db, NULL); + memcpy(&k1, dbt1->data, sizeof(k1)); + memcpy(&k2, dbt2->data, sizeof(k2)); + + return (NUMCMP(k1, k2)); +} + +/* Btree compare function for a u_int32_t type of key. */ +static int +__lv_ui32_cmp(db, dbt1, dbt2) + DB *db; + const DBT *dbt1; + const DBT *dbt2; +{ + u_int32_t k1, k2; + + COMPQUIET(db, NULL); + memcpy(&k1, dbt1->data, sizeof(k1)); + memcpy(&k2, dbt2->data, sizeof(k2)); + + return (NUMCMP(k1, k2)); +} + +/* Btree compare function for a DB_LSN type of key. */ +static int +__lv_lsn_cmp(db, dbt1, dbt2) + DB *db; + const DBT *dbt1; + const DBT *dbt2; +{ + DB_LSN lsn1, lsn2; + + DB_ASSERT(db->env, dbt1->size == sizeof(DB_LSN)); + DB_ASSERT(db->env, dbt2->size == sizeof(DB_LSN)); + memcpy(&lsn1, dbt1->data, sizeof(DB_LSN)); + memcpy(&lsn2, dbt2->data, sizeof(DB_LSN)); + + return (LOG_COMPARE(&lsn1, &lsn2)); +} + +/* + * Structure management routines. We keep each structure on a + * consecutive memory chunk. + * + * The get functions will allocate memory via __os_malloc, and callers + * should free the memory after use. The update functions for VRFY_TXN_INFO + * and VRFY_FILEREG_INFO may realloc the structure. + */ + +/* + * PUBLIC: int __put_txn_vrfy_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: const VRFY_TXN_INFO *)); + */ +int +__put_txn_vrfy_info (lvinfo, txninfop) + const DB_LOG_VRFY_INFO *lvinfo; + const VRFY_TXN_INFO *txninfop; +{ + int ret; + DBT key, data; + + ret = __lv_pack_txn_vrfy_info(txninfop, &key, &data); + DB_ASSERT(lvinfo->dbenv->env, ret == 0); + + BDBOP2(lvinfo->dbenv, __db_put(lvinfo->txninfo, lvinfo->ip, NULL, + &key, &data, 0), "__put_txn_vrfy_info"); + __os_free(lvinfo->dbenv->env, data.data); + + return (0); +} + +/* Construct a key and data DBT from the structure. */ +static int +__lv_pack_txn_vrfy_info(txninfop, key, data) + const VRFY_TXN_INFO *txninfop; + DBT *key, *data; +{ + int ret; + char *buf, *p; + size_t bufsz, len; + u_int32_t i; + DBT *pdbt; + + memset(key, 0, sizeof(DBT)); + memset(data, 0, sizeof(DBT)); + ret = 0; + bufsz = TXN_VERIFY_INFO_TOTSIZE(*txninfop); + + if ((ret = __os_malloc(NULL, bufsz, &buf)) != 0) + goto err; + memset(buf, 0, bufsz); + memcpy(buf, txninfop, TXN_VERIFY_INFO_FIXSIZE); + p = buf + TXN_VERIFY_INFO_FIXSIZE; + memcpy(p, txninfop->recycle_lsns, len = sizeof(DB_LSN) * + txninfop->num_recycle); + p += len; + + for (i = 0; i < txninfop->filenum; i++) { + + pdbt = &(txninfop->fileups[i]); + memcpy(p, &(pdbt->size), sizeof(pdbt->size)); + p += sizeof(pdbt->size); + memcpy(p, pdbt->data, pdbt->size); + p += pdbt->size; + } + + key->data = (void *)&txninfop->txnid; + key->size = sizeof(txninfop->txnid); + data->data = buf; + data->size = (u_int32_t)bufsz; + data->flags |= DB_DBT_MALLOC; +err: + return (ret); +} + +/* Calculate a DBT array's total number of bytes to store. */ +static size_t +__lv_dbt_arrsz(arr, arrlen) + const DBT *arr; + u_int32_t arrlen; +{ + u_int32_t i; + size_t sz; + + sz = 0; + + /* For each DBT object, store its size and its data bytes. */ + for (i = 0; i < arrlen; i++) + sz += arr[i].size + sizeof(arr[i].size); + + return sz; +} + +/* + * __get_txn_vrfy_info -- + * Get a VRFY_TXN_INFO object from db by txnid. Callers should free the + * object by calling __free_txninfo. + * + * PUBLIC: int __get_txn_vrfy_info __P((const DB_LOG_VRFY_INFO *, u_int32_t, + * PUBLIC: VRFY_TXN_INFO **)); + */ +int +__get_txn_vrfy_info (lvinfo, txnid, txninfopp) + const DB_LOG_VRFY_INFO *lvinfo; + u_int32_t txnid; + VRFY_TXN_INFO **txninfopp; +{ + int ret; + DBT key, data; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = &txnid; + key.size = sizeof(txnid); + + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->txninfo, lvinfo->ip, NULL, + &key, &data, 0), DB_NOTFOUND, "__get_txn_vrfy_info"); + + if (ret != DB_NOTFOUND) + ret = __lv_unpack_txn_vrfy_info(txninfopp, &data); + + return (ret); +} + +/* Construct a structure from a DBT. */ +static int +__lv_unpack_txn_vrfy_info(txninfopp, data) + VRFY_TXN_INFO **txninfopp; + const DBT *data; +{ + size_t bufsz; + VRFY_TXN_INFO *buf, *txninfop; + DB_LSN *lsns, *p; + u_int32_t i, sz; + char *pb, *q; + int ret; + + ret = 0; + i = sz = 0; + lsns = p = NULL; + pb = q = NULL; + txninfop = (VRFY_TXN_INFO *)data->data; + lsns = (DB_LSN *)((char *)data->data + TXN_VERIFY_INFO_FIXSIZE); + pb = (char *)lsns + txninfop->num_recycle * sizeof(DB_LSN); + + if ((ret = __os_malloc(NULL, bufsz = sizeof(VRFY_TXN_INFO), &buf)) != 0) + goto err; + memset(buf, 0, bufsz); + memcpy(buf, data->data, TXN_VERIFY_INFO_FIXSIZE); + + if (txninfop->num_recycle != 0) { + if ((ret = __os_malloc(NULL, + txninfop->num_recycle * sizeof(DB_LSN), &p)) != 0) + goto err; + memcpy(p, lsns, txninfop->num_recycle * sizeof(DB_LSN)); + buf->recycle_lsns = p; + } + + if (txninfop->filenum != 0) { + if ((ret = __os_malloc(NULL, + txninfop->filenum * sizeof(DBT), &q)) != 0) + goto err; + memset(q, 0, txninfop->filenum * sizeof(DBT)); + buf->fileups = (DBT *)q; + for (i = 0; i < txninfop->filenum; i++) { + memcpy(&sz, pb, sizeof(sz)); + pb += sizeof(sz); + if ((ret = __os_malloc(NULL, sz, &q)) != 0) + goto err; + memcpy(q, pb, sz); + pb += sz; + + buf->fileups[i].data = q; + buf->fileups[i].size = sz; + } + } + + *txninfopp = buf; +err: + return (ret); +} + +static int +__lv_add_recycle_lsn (txninfop, lsn) + VRFY_TXN_INFO *txninfop; + const DB_LSN *lsn; +{ + int ret; + + ret = 0; + txninfop->num_recycle++; + if ((ret = __os_realloc(NULL, txninfop->num_recycle * sizeof(DB_LSN), + &(txninfop->recycle_lsns))) != 0) + goto err; + txninfop->recycle_lsns[txninfop->num_recycle - 1] = *lsn; +err: + return (ret); +} + +/* + * __add_recycle_lsn_range -- + * Add recycle info for each txn within the recycled txnid range. + * + * PUBLIC: int __add_recycle_lsn_range __P((DB_LOG_VRFY_INFO *, + * PUBLIC: const DB_LSN *, u_int32_t, u_int32_t)); + */ +int +__add_recycle_lsn_range(lvinfo, lsn, min, max) + DB_LOG_VRFY_INFO *lvinfo; + const DB_LSN *lsn; + u_int32_t min, max; +{ + DBC *csr; + int ret, tret; + u_int32_t i; + DBT key2, data2; + struct __add_recycle_params param; + + csr = NULL; + ret = tret = 0; + memset(&key2, 0, sizeof(DBT)); + memset(&data2, 0, sizeof(DBT)); + memset(¶m, 0, sizeof(param)); + + if ((ret = __os_malloc(lvinfo->dbenv->env, sizeof(VRFY_TXN_INFO *) * + (param.ti2ul = 1024), &(param.ti2u))) != 0) + goto err; + param.ti2ui = 0; + param.recycle_lsn = *lsn; + param.min = min; + param.max = max; + + /* Iterate the specified range and process each transaction. */ + if ((ret = __iterate_txninfo(lvinfo, min, max, __lv_add_recycle_handler, + ¶m)) != 0) + goto err; + + /* + * Save updated txninfo structures. We can't do so in the above + * iteration, so we have to save them here. + */ + BDBOP(__db_cursor(lvinfo->txninfo, lvinfo->ip, NULL, &csr, DBC_BULK)); + + for (i = 0; i < param.ti2ui; i++) { + ret = __lv_pack_txn_vrfy_info(param.ti2u[i], &key2, &data2); + DB_ASSERT(lvinfo->dbenv->env, ret == 0); + BDBOP(__dbc_put(csr, &key2, &data2, DB_KEYLAST)); + /* + * key2.data refers to param.ti2u[i]'s memory, data2.data is + * freed by DB since we set DB_DBT_MALLOC. + */ + if ((ret = __free_txninfo(param.ti2u[i])) != 0) + goto err; + } + +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + __os_free(lvinfo->dbenv->env, param.ti2u); + if (ret != 0) + __db_err(lvinfo->dbenv->env, ret, + "__add_recycle_lsn_range"); + + return (ret); +} + +/* + * __iterate_txninfo -- + * Iterate throught the transaction info database as fast as possible, + * and process each key/data pair using a callback handler. Break the + * iteration if the handler returns non-zero values. + * + * PUBLIC: int __iterate_txninfo __P((DB_LOG_VRFY_INFO *, u_int32_t, + * PUBLIC: u_int32_t, TXNINFO_HANDLER, void *)); + */ +int +__iterate_txninfo(lvinfo, min, max, handler, param) + DB_LOG_VRFY_INFO *lvinfo; + u_int32_t min, max; + TXNINFO_HANDLER handler; + void *param; +{ + ENV *env; + VRFY_TXN_INFO *txninfop; + int ret, tret; + u_int32_t bufsz, pgsz, txnid; + size_t retkl, retdl; + char *btbuf; + u_int8_t *retk, *retd; + DBT key, data, data2; + DBC *csr; + void *p; + + csr = NULL; + env = lvinfo->dbenv->env; + txninfop = NULL; + ret = tret = 0; + txnid = 0; + retkl = retdl = 0; + bufsz = 64 * 1024; + btbuf = NULL; + retk = retd = NULL; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&data2, 0, sizeof(DBT)); + + pgsz = lvinfo->txninfo->pgsize; + DB_ASSERT(env, ret == 0); + + if (bufsz % pgsz != 0) + bufsz = pgsz * (bufsz / pgsz); + + if ((ret = __os_malloc(env, bufsz, &btbuf)) != 0) + goto err; + + BDBOP(__db_cursor(lvinfo->txninfo, lvinfo->ip, NULL, &csr, DBC_BULK)); + + /* + * Use bulk retrieval to scan the database as fast as possible. + */ + data.data = btbuf; + data.ulen = bufsz; + data.flags |= DB_DBT_USERMEM; + + for (ret = __dbc_get(csr, &key, &data, DB_FIRST | DB_MULTIPLE_KEY) ;; + ret = __dbc_get(csr, &key, &data, DB_NEXT | DB_MULTIPLE_KEY)) { + switch (ret) { + case 0: + break; + case DB_NOTFOUND: + goto out; + /* No break statement allowed by lint here. */ + case DB_BUFFER_SMALL: + if ((ret = __os_realloc(lvinfo->dbenv->env, + bufsz *= 2, &btbuf)) != 0) + goto out; + data.ulen = bufsz; + data.data = btbuf; + continue;/* Continue the for-loop. */ + /* No break statement allowed by lint here. */ + default: + goto err; + } + + /* + * Do bulk get. Some txninfo objects may be updated by the + * handler, but we can't store them immediately in the same + * loop because we wouldn't be able to continue the bulk get + * using the same cursor; and we can't use another cursor + * otherwise we may self-block. In the handler we need to + * store the updated objects and store them to db when we get + * out of this loop. + */ + DB_MULTIPLE_INIT(p, &data); + while (1) { + DB_MULTIPLE_KEY_NEXT(p, &data, + retk, retkl, retd, retdl); + if (p == NULL) + break; + DB_ASSERT(env, retkl == sizeof(txnid) && retk != NULL); + memcpy(&txnid, retk, retkl); + /* + * Process it if txnid in range or no range specified. + * The range must be a closed one. + */ + if ((min != 0 && txnid >= min && max != 0 && + txnid <= max) || (min == 0 && max == 0)) { + data2.data = retd; + data2.size = (u_int32_t)retdl; + + if ((ret = __lv_unpack_txn_vrfy_info( + &txninfop, &data2)) != 0) + goto out; + if ((ret = handler(lvinfo, txninfop, + param)) != 0) + /* Stop the iteration on error. */ + goto out; + } + } + + } +out: + if (ret == DB_NOTFOUND) + ret = 0; +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + __os_free(lvinfo->dbenv->env, btbuf); + return (ret); +} + +/* Txninfo iteration handler to add recycle info for affected txns. */ +static int +__lv_add_recycle_handler(lvinfo, txninfop, params) + DB_LOG_VRFY_INFO *lvinfo; + VRFY_TXN_INFO *txninfop; + void *params; +{ + int ret; + struct __add_recycle_params *param; + + ret = 0; + param = (struct __add_recycle_params *)params; + + /* + * If the txnid is reused, update its recycle info and note it for + * later update, otherwise free the txninfop structure. + */ + if (txninfop->txnid < param->min && txninfop->txnid > param->max) { + ret = __free_txninfo(txninfop); + return (ret); + } + + ret = __lv_add_recycle_lsn(txninfop, &(param->recycle_lsn)); + + if (ret != 0) + goto err; + /* + * Below is one way to tell if a txn is aborted without doing another + * backward pass of the log. However if the txn id is not in the + * chosen recycled txn id range, we can't tell, until all the log + * records are passed --- the remaining active txns are the aborted + * txns. + * No longer needed since we did another backward pass of the log + * and have all the txn lifetimes. + if (txninfop->status == TXN_STAT_ACTIVE) + __on_txn_abort(lvinfo, txninfop); + */ + if (txninfop->status == TXN_STAT_PREPARE) { + __db_errx(lvinfo->dbenv->env, + "[ERROR] Transaction with ID %u is prepared and not " + "committed, but its ID is recycled by log record [%u, %u].", + txninfop->txnid, param->recycle_lsn.file, + param->recycle_lsn.offset); + } + /* Note down to store later. */ + param->ti2u[(param->ti2ui)++] = txninfop; + if (param->ti2ui == param->ti2ul) + BDBOP(__os_realloc(lvinfo->dbenv->env, + sizeof(VRFY_TXN_INFO *) * (param->ti2ul *= 2), + &(param->ti2u))); +err: + return (ret); + +} +/* + * PUBLIC: int __rem_last_recycle_lsn __P((VRFY_TXN_INFO *)); + */ +int +__rem_last_recycle_lsn(txninfop) + VRFY_TXN_INFO *txninfop; +{ + int ret; + + ret = 0; + if (txninfop->num_recycle == 0) + return (0); + txninfop->num_recycle--; + if (txninfop->num_recycle > 0) + BDBOP(__os_realloc(NULL, txninfop->num_recycle * sizeof(DB_LSN), + &(txninfop->recycle_lsns))); + else { + __os_free(NULL, txninfop->recycle_lsns); + txninfop->recycle_lsns = NULL; + } +err: + return (ret); + +} + +/* + * __add_file_updated -- + * Add a file's dbregid and uid to the updating txn if it's not yet + * recorded. + * + * PUBLIC: int __add_file_updated __P((VRFY_TXN_INFO *, const DBT *, int32_t)); + */ +int +__add_file_updated (txninfop, fileid, dbregid) + VRFY_TXN_INFO *txninfop; + const DBT *fileid; + int32_t dbregid; +{ + int ret; + DBT *pdbt, *p; + u_int32_t found, i; + + ret = 0; + p = pdbt = NULL; + + for (found = 0, i = 0; i < txninfop->filenum; i++) { + p = &(txninfop->fileups[i]); + if (p->size == fileid->size && + memcmp(p->data, fileid->data, p->size) == 0) { + found = 1; + break; + } + } + + if (found) + return (0); + + /* Add file's uid into the array, deep copy from fileid. */ + txninfop->filenum++; + if ((ret = __os_realloc(NULL, txninfop->filenum * + sizeof(DBT), &(txninfop->fileups))) != 0) + goto err; + + pdbt = &(txninfop->fileups[txninfop->filenum - 1]); + memset(pdbt, 0, sizeof(DBT)); + if ((ret = __os_malloc(NULL, + pdbt->size = fileid->size, &(pdbt->data))) != 0) + goto err; + memcpy(pdbt->data, fileid->data, fileid->size); + + /* Add file dbregid into the array. */ + BDBOP(__os_realloc(NULL, txninfop->filenum * + sizeof(int32_t), &(txninfop->dbregid))); + txninfop->dbregid[txninfop->filenum - 1] = dbregid; +err: + return (ret); +} + +/* + * PUBLIC: int __del_file_updated __P((VRFY_TXN_INFO *, const DBT *)); + */ +int +__del_file_updated (txninfop, fileid) + VRFY_TXN_INFO *txninfop; + const DBT *fileid; +{ + u_int32_t found, i; + int ret; + DBT *p; + void *pdbtdata; + + ret = 0; + + if (txninfop->filenum == 0) + return (0); + + /* + * If the array has an element identical to fileid, remove it. fileid + * itself is intact after this function call. + */ + for (found = 0, i = 0, pdbtdata = NULL; i < txninfop->filenum; i++) { + p = &(txninfop->fileups[i]); + if (p->size == fileid->size && + memcmp(p->data, fileid->data, p->size) == 0) { + pdbtdata = p->data; + if (txninfop->filenum > 1) { + memmove(txninfop->fileups + i, txninfop-> + fileups + i + 1, sizeof(DBT) * (txninfop-> + filenum - (i + 1))); + memmove(txninfop->dbregid + i, txninfop-> + dbregid + i + 1, sizeof(int32_t) * + (txninfop->filenum - (i + 1))); + } else { + __os_free(NULL, txninfop->fileups); + __os_free(NULL, txninfop->dbregid); + txninfop->fileups = NULL; + txninfop->dbregid = NULL; + } + found = 1; + break; + } + } + + if (found) { + txninfop->filenum--; + if (txninfop->filenum) { + BDBOP(__os_realloc(NULL, sizeof(DBT) * + txninfop->filenum, &(txninfop->fileups))); + BDBOP(__os_realloc(NULL, sizeof(int32_t) * + txninfop->filenum, &(txninfop->dbregid))); + } + __os_free(NULL, pdbtdata); + } +err: + return (ret); +} + +/* + * PUBLIC: int __clear_fileups __P((VRFY_TXN_INFO *)); + */ +int +__clear_fileups(txninfop) + VRFY_TXN_INFO *txninfop; +{ + u_int32_t i; + + for (i = 0; i < txninfop->filenum; i++) + __os_free(NULL, txninfop->fileups[i].data); + + __os_free(NULL, txninfop->fileups); + __os_free(NULL, txninfop->dbregid); + txninfop->fileups = NULL; + txninfop->dbregid = NULL; + txninfop->filenum = 0; + + return (0); +} + +/* + * __free_txninfo_stack -- + * The object is on stack, only free its internal memory, not itself. + * PUBLIC: int __free_txninfo_stack __P((VRFY_TXN_INFO *)); + */ +int +__free_txninfo_stack (p) + VRFY_TXN_INFO *p; +{ + u_int32_t i; + + if (p == NULL) + return (0); + + if (p->fileups != NULL) { + for (i = 0; i < p->filenum; i++) + __os_free(NULL, p->fileups[i].data); + __os_free(NULL, p->fileups); + } + + if (p->dbregid != NULL) + __os_free(NULL, p->dbregid); + + if (p->recycle_lsns != NULL) + __os_free(NULL, p->recycle_lsns); + + return (0); +} +/* + * PUBLIC: int __free_txninfo __P((VRFY_TXN_INFO *)); + */ +int +__free_txninfo(p) + VRFY_TXN_INFO *p; +{ + (void)__free_txninfo_stack(p); + __os_free(NULL, p); + + return (0); +} + +/* Construct a key and data DBT from the structure. */ +static int +__lv_pack_filereg(freginfo, data) + const VRFY_FILEREG_INFO *freginfo; + DBT *data; +{ + char *buf, *p; + size_t bufsz, offset; + int ret; + + ret = 0; + if ((ret = __os_malloc(NULL, + bufsz = FILE_REG_INFO_TOTSIZE(*freginfo), &buf)) != 0) + goto err; + memset(buf, 0, bufsz); + + memcpy(buf, freginfo, FILE_REG_INFO_FIXSIZE); + p = buf + FILE_REG_INFO_FIXSIZE; + + offset = sizeof(int32_t) * freginfo->regcnt; + memcpy(p, freginfo->dbregids, offset); + p += offset; + + memcpy(p, &(freginfo->fileid.size), sizeof(freginfo->fileid.size)); + p += sizeof(freginfo->fileid.size); + memcpy(p, freginfo->fileid.data, freginfo->fileid.size); + p += freginfo->fileid.size; + (void)strcpy(p, freginfo->fname); + + data->data = buf; + data->size = (u_int32_t)bufsz; +err: + return (ret); +} + +/* + * PUBLIC: int __put_filereg_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: const VRFY_FILEREG_INFO *)); + */ +int __put_filereg_info (lvinfo, freginfo) + const DB_LOG_VRFY_INFO *lvinfo; + const VRFY_FILEREG_INFO *freginfo; +{ + + int ret; + DBT data; + + memset(&data, 0, sizeof(DBT)); + + if ((ret = __lv_pack_filereg(freginfo, &data)) != 0) + goto err; + + /* + * We store dbregid-filereg map into dbregids.db, but we can't make + * dbregids.db the sec db of fileregs.db, because dbregid is only + * valid when a db file is open, we want to delete data with same + * key in dbregids.db, but we want to keep all filereg_info data in + * fileregs.db to track all db file lifetime and status. + * + * Consequently we will store dbregid-file_uid in dbregs.db, so that we + * can delete dbregid when the db handle is closed, and we can + * use the dbregid to get the currently open db file's uid. + */ + + BDBOP2(lvinfo->dbenv, __db_put(lvinfo->fileregs, lvinfo->ip, NULL, + (DBT *)&(freginfo->fileid), &data, 0), "__put_filereg_info"); + +err: + if (data.data != NULL) + __os_free(lvinfo->dbenv->env, data.data); + + return (ret); +} + +/* + * PUBLIC: int __del_filelife __P((const DB_LOG_VRFY_INFO *, int32_t)); + */ +int +__del_filelife(lvinfo, dbregid) + const DB_LOG_VRFY_INFO *lvinfo; + int32_t dbregid; +{ + int ret; + DBT key; + + memset(&key, 0, sizeof(DBT)); + key.data = &(dbregid); + key.size = sizeof(dbregid); + + if ((ret = __db_del(lvinfo->dbregids, lvinfo->ip, NULL, + &key, 0)) != 0) + goto err; + +err: + return (ret); +} + +/* + * PUBLIC: int __put_filelife __P((const DB_LOG_VRFY_INFO *, VRFY_FILELIFE *)); + */ +int +__put_filelife (lvinfo, pflife) + const DB_LOG_VRFY_INFO *lvinfo; + VRFY_FILELIFE *pflife; +{ + int ret; + DBT key, data; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = &(pflife->dbregid); + key.size = sizeof(pflife->dbregid); + data.data = pflife; + data.size = sizeof(VRFY_FILELIFE); + + if ((ret = __db_put(lvinfo->dbregids, lvinfo->ip, NULL, + &key, &data, 0)) != 0) + goto err; + +err: + return (ret); +} + +/* + * PUBLIC: int __get_filelife __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: int32_t, VRFY_FILELIFE **)); + */ +int +__get_filelife (lvinfo, dbregid, flifepp) + const DB_LOG_VRFY_INFO *lvinfo; + int32_t dbregid; + VRFY_FILELIFE **flifepp; +{ + int ret; + DBT key, data; + VRFY_FILELIFE *flifep; + + ret = 0; + flifep = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &dbregid; + key.size = sizeof(dbregid); + if ((ret = __db_get(lvinfo->dbregids, lvinfo->ip, NULL, + &key, &data, 0)) != 0) + goto err; + if ((ret = __os_malloc(lvinfo->dbenv->env, + sizeof(VRFY_FILELIFE), &flifep)) != 0) + goto err; + DB_ASSERT(lvinfo->dbenv->env, flifep != NULL); + memcpy(flifep, data.data, sizeof(VRFY_FILELIFE)); + *flifepp = flifep; +err: + return (ret); +} + +/* + * PUBLIC: int __get_filereg_by_dbregid __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: int32_t, VRFY_FILEREG_INFO **)); + */ +int +__get_filereg_by_dbregid(lvinfo, dbregid, freginfopp) + const DB_LOG_VRFY_INFO *lvinfo; + int32_t dbregid; + VRFY_FILEREG_INFO **freginfopp; +{ + int ret; + DBT key, data; + char uid[DB_FILE_ID_LEN]; + VRFY_FILELIFE *pflife; + + memset(&data, 0, sizeof(DBT)); + memset(&key, 0, sizeof(DBT)); + key.data = &dbregid; + key.size = sizeof(dbregid); + + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->dbregids, lvinfo->ip, NULL, + &key, &data, 0), DB_NOTFOUND, "__get_filereg_by_dbregid"); + if (ret == DB_NOTFOUND) + goto err; + + /* Use the file-uid as key to retrieve from fileregs.db. */ + pflife = (VRFY_FILELIFE *)data.data; + memcpy((void *)uid, (void *)pflife->fileid, key.size = DB_FILE_ID_LEN); + + key.data = (void *)uid; + memset(&data, 0, sizeof(DBT)); + + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->fileregs, lvinfo->ip, NULL, + &key, &data, 0), DB_NOTFOUND, "__get_filereg_by_dbregid"); + if (ret == DB_NOTFOUND) + goto err; + if ((ret = __lv_unpack_filereg(&data, freginfopp)) != 0) + goto err; + +err: + return (ret); +} + +/* + * PUBLIC: int __add_dbregid __P((DB_LOG_VRFY_INFO *, VRFY_FILEREG_INFO *, + * PUBLIC: int32_t, u_int32_t, DB_LSN, DBTYPE, db_pgno_t, int *)); + */ +int +__add_dbregid(lvh, freg, dbregid, opcode, lsn, dbtype, meta_pgno, addp) + DB_LOG_VRFY_INFO *lvh; + VRFY_FILEREG_INFO *freg; + int32_t dbregid; + u_int32_t opcode; + DB_LSN lsn; + DBTYPE dbtype; + db_pgno_t meta_pgno; + int *addp; +{ + int inarray, ret, tret; + u_int32_t i, j; + VRFY_FILELIFE flife; + + inarray = ret = tret = 0; + for (i = 0; i < freg->regcnt; i++) { + if (freg->dbregids[i] == dbregid) { + if (!IS_DBREG_CLOSE(opcode)) { + /* Opening an open dbreg id. */ + if (IS_DBREG_OPEN(opcode) && + (opcode != DBREG_CHKPNT && + opcode != DBREG_XCHKPNT)) { + tret = 2; + goto err; + } + tret = 0; + inarray = 1; + } else + /* Found the dbregid; gonna remove it. */ + tret = -1; + break; + } + } + + if (IS_DBREG_OPEN(opcode)) + tret = 1;/* dbregid not in the array, gonna add 1. */ + + /* + * Remove closed dbregid. dbregid can be recycled, not unique to a db + * file, it's dynamically allocated for each db handle. + */ + if (tret == -1) { + for (j = i; j < freg->regcnt - 1; j++) + freg->dbregids[j] = freg->dbregids[j + 1]; + freg->regcnt--; + BDBOP(__os_realloc(lvh->dbenv->env, + sizeof(int32_t) * freg->regcnt, &(freg->dbregids))); + /* Don't remove dbregid life info from dbregids db. */ + } else if (tret == 1) { + if (!inarray) { + freg->regcnt++; + BDBOP(__os_realloc(lvh->dbenv->env, + sizeof(int32_t) * freg->regcnt, &(freg->dbregids))); + freg->dbregids[freg->regcnt - 1] = dbregid; + } + flife.dbregid = dbregid; + memcpy(flife.fileid, freg->fileid.data, freg->fileid.size); + flife.lifetime = opcode; + flife.dbtype = dbtype; + flife.lsn = lsn; + flife.meta_pgno = meta_pgno; + if ((ret = __put_filelife(lvh, &flife)) != 0) + goto err; + } + +err: + *addp = tret; + return (ret); + +} + +/* + * PUBLIC: int __get_filereg_info __P((const DB_LOG_VRFY_INFO *, const DBT *, + * PUBLIC: VRFY_FILEREG_INFO **)); + */ +int +__get_filereg_info (lvinfo, fuid, freginfopp) + const DB_LOG_VRFY_INFO *lvinfo; + const DBT *fuid; + VRFY_FILEREG_INFO **freginfopp; +{ + int ret; + DBT data; + + memset(&data, 0, sizeof(DBT)); + + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->fileregs, lvinfo->ip, NULL, + (DBT *)fuid, &data, 0), DB_NOTFOUND, "__get_filereg_info"); + if (ret == DB_NOTFOUND) + goto err; + if ((ret = __lv_unpack_filereg(&data, freginfopp)) != 0) + goto err; + +err: + return (ret); +} + +static int +__lv_unpack_filereg(data, freginfopp) + const DBT *data; + VRFY_FILEREG_INFO **freginfopp; +{ + char *p, *q; + u_int32_t fidsz, arrsz; + VRFY_FILEREG_INFO *buf; + int ret; + + ret = 0; + p = q = NULL; + fidsz = arrsz = 0; + buf = NULL; + + if ((ret = __os_malloc(NULL, sizeof(VRFY_FILEREG_INFO), &buf)) != 0) + goto err; + memset(buf, 0, sizeof(VRFY_FILEREG_INFO)); + + memcpy(buf, data->data, FILE_REG_INFO_FIXSIZE); + *freginfopp = (VRFY_FILEREG_INFO *)buf; + p = ((char *)(data->data)) + FILE_REG_INFO_FIXSIZE; + + if ((ret = __os_malloc(NULL, arrsz = (*freginfopp)->regcnt * + sizeof(int32_t), &((*freginfopp)->dbregids))) != 0) + goto err; + memcpy((*freginfopp)->dbregids, p, arrsz); + p += arrsz; + + memcpy(&fidsz, p, sizeof(fidsz)); + p += sizeof(fidsz); + if ((ret = __os_malloc(NULL, fidsz, &q)) != 0) + goto err; + memcpy(q, p, fidsz); + (*freginfopp)->fileid.data = q; + (*freginfopp)->fileid.size = fidsz; + p += fidsz; + + if ((ret = __os_malloc(NULL, sizeof(char) * (strlen(p) + 1), &q)) != 0) + goto err; + (void)strcpy(q, p); + + (*freginfopp)->fname = q; +err: + return (ret); +} + +/* + * PUBLIC: int __free_filereg_info __P((VRFY_FILEREG_INFO *)); + */ +int +__free_filereg_info(p) + VRFY_FILEREG_INFO *p; +{ + if (p == NULL) + return (0); + if (p ->fname != NULL) + __os_free(NULL, (void *)(p->fname)); + if (p->fileid.data != NULL) + __os_free(NULL, p->fileid.data); + if (p->dbregids != NULL) + __os_free(NULL, p->dbregids); + __os_free(NULL, p); + + return (0); +} + +/* + * PUBLIC: int __get_ckp_info __P((const DB_LOG_VRFY_INFO *, DB_LSN, + * PUBLIC: VRFY_CKP_INFO **)); + */ +int +__get_ckp_info (lvinfo, lsn, ckpinfopp) + const DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; + VRFY_CKP_INFO **ckpinfopp; +{ + int ret; + DBT key, data; + VRFY_CKP_INFO *ckpinfo; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = &lsn; + key.size = sizeof(DB_LSN); + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->ckps, lvinfo->ip, NULL, + &key, &data, 0), DB_NOTFOUND, "__get_ckp_info"); + + if (ret == DB_NOTFOUND) + goto err; + + if ((ret = __os_malloc(lvinfo->dbenv->env, + sizeof(VRFY_CKP_INFO), &ckpinfo)) != 0) + goto err; + memcpy(ckpinfo, data.data, sizeof(VRFY_CKP_INFO)); + *ckpinfopp = ckpinfo; +err: + return (ret); + +} + +/* + * PUBLIC: int __get_last_ckp_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: VRFY_CKP_INFO **)); + */ +int +__get_last_ckp_info (lvinfo, ckpinfopp) + const DB_LOG_VRFY_INFO *lvinfo; + VRFY_CKP_INFO **ckpinfopp; +{ + int ret, tret; + DBT key, data; + VRFY_CKP_INFO *ckpinfo; + DBC *csr; + + csr = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + BDBOP(__db_cursor(lvinfo->ckps, lvinfo->ip, NULL, &csr, 0)); + if ((ret = __dbc_get(csr, &key, &data, DB_LAST)) != 0) + goto err; + + if ((ret = __os_malloc(lvinfo->dbenv->env, + sizeof(VRFY_CKP_INFO), &ckpinfo)) != 0) + goto err; + DB_ASSERT(lvinfo->dbenv->env, sizeof(VRFY_CKP_INFO) == data.size); + memcpy(ckpinfo, data.data, sizeof(VRFY_CKP_INFO)); + *ckpinfopp = ckpinfo; +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + if (ret != 0 && ret != DB_NOTFOUND) + __db_err(lvinfo->dbenv->env, ret, "__get_last_ckp_info"); + return (ret); +} + +/* + * PUBLIC: int __put_ckp_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: const VRFY_CKP_INFO *)); + */ +int __put_ckp_info (lvinfo, ckpinfo) + const DB_LOG_VRFY_INFO *lvinfo; + const VRFY_CKP_INFO *ckpinfo; +{ + int ret; + DBT key, data; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = (void *)&ckpinfo->lsn; + key.size = sizeof(DB_LSN); + data.data = (void *)ckpinfo; + data.size = sizeof(VRFY_CKP_INFO); + + BDBOP2(lvinfo->dbenv, __db_put(lvinfo->ckps, lvinfo->ip, + NULL, &key, &data, 0), "__put_ckp_info"); + return (0); +} + +/* + * PUBLIC: int __get_timestamp_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: DB_LSN, VRFY_TIMESTAMP_INFO **)); + */ +int __get_timestamp_info (lvinfo, lsn, tsinfopp) + const DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; + VRFY_TIMESTAMP_INFO **tsinfopp; +{ + int ret; + DBT key, data; + VRFY_TIMESTAMP_INFO *tsinfo; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = &lsn; + key.size = sizeof(DB_LSN); + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->lsntime, lvinfo->ip, NULL, + &key, &data, 0), DB_NOTFOUND, "__get_timestamp_info"); + + if (ret == DB_NOTFOUND) + goto err; + + if ((ret = __os_malloc(lvinfo->dbenv->env, + sizeof(VRFY_TIMESTAMP_INFO), &tsinfo)) != 0) + goto err; + + memcpy(tsinfo, data.data, sizeof(VRFY_TIMESTAMP_INFO)); + *tsinfopp = tsinfo; +err: + return (ret); +} + +/* + * __get_latest_timestamp_info -- + * Get latest timestamp info before lsn. + * PUBLIC: int __get_latest_timestamp_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: DB_LSN, VRFY_TIMESTAMP_INFO **)); + */ +int __get_latest_timestamp_info(lvinfo, lsn, tsinfopp) + const DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; + VRFY_TIMESTAMP_INFO **tsinfopp; +{ + int ret, tret; + DBT key, data; + VRFY_TIMESTAMP_INFO *tsinfo; + DBC *csr; + + csr = NULL; + ret = tret = 0; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &lsn; + key.size = sizeof(lsn); + BDBOP(__db_cursor(lvinfo->lsntime, lvinfo->ip, NULL, &csr, 0)); + + BDBOP(__dbc_get(csr, &key, &data, DB_SET)); + BDBOP(__dbc_get(csr, &key, &data, DB_PREV)); + + if ((ret = __os_malloc(lvinfo->dbenv->env, sizeof(VRFY_TIMESTAMP_INFO), + &tsinfo)) != 0) + goto err; + + memcpy(tsinfo, data.data, sizeof(VRFY_TIMESTAMP_INFO)); + *tsinfopp = tsinfo; + +err: + if (ret != 0 && ret != DB_NOTFOUND) + __db_err(lvinfo->dbenv->env, + ret, "__get_latest_timestamp_info"); + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * PUBLIC: int __put_timestamp_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: const VRFY_TIMESTAMP_INFO *)); + */ +int __put_timestamp_info (lvinfo, tsinfo) + const DB_LOG_VRFY_INFO *lvinfo; + const VRFY_TIMESTAMP_INFO *tsinfo; +{ + int ret; + DBT key, data; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = (void *)&(tsinfo->lsn); + key.size = sizeof(DB_LSN); + data.data = (void *)tsinfo; + data.size = sizeof(VRFY_TIMESTAMP_INFO); + BDBOP2(lvinfo->dbenv, __db_put(lvinfo->lsntime, lvinfo->ip, NULL, + &key, &data, 0), "__put_timestamp_info"); + + return (0); +} + +static int +__lv_txnrgns_lsn_cmp (db, d1, d2) + DB *db; + const DBT *d1, *d2; +{ + struct __lv_txnrange r1, r2; + + DB_ASSERT(db->env, d1->size == sizeof(r1)); + DB_ASSERT(db->env, d2->size == sizeof(r2)); + memcpy(&r1, d1->data, d1->size); + memcpy(&r2, d2->data, d2->size); + + return (LOG_COMPARE(&(r1.end), &(r2.end))); +} + +/* + * __find_lsnrg_by_timerg -- + * Find the lsn closed interval [beginlsn, endlsn] so that the + * corresponding timestamp interval fully contains interval [begin, end]. + * PUBLIC: int __find_lsnrg_by_timerg __P((DB_LOG_VRFY_INFO *, + * PUBLIC: time_t, time_t, DB_LSN *, DB_LSN *)); + */ +int +__find_lsnrg_by_timerg(lvinfo, begin, end, startlsn, endlsn) + DB_LOG_VRFY_INFO *lvinfo; + time_t begin, end; + DB_LSN *startlsn, *endlsn; +{ + int ret, tret; + DBC *csr; + struct __lv_timestamp_info *t1, *t2; + DBT key, data; + + ret = tret = 0; + csr = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + BDBOP(__db_cursor(lvinfo->timelsn, lvinfo->ip, NULL, &csr, 0)); + + /* + * We want a lsn range that completely contains [begin, end], so + * try move 1 record prev when getting the startlsn. + */ + key.data = &begin; + key.size = sizeof(begin); + BDBOP(__dbc_get(csr, &key, &data, DB_SET_RANGE)); + if ((ret = __dbc_get(csr, &key, &data, DB_PREV)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND)/* begin is smaller than the smallest key. */ + startlsn->file = startlsn->offset = 0;/* beginning. */ + else { + t1 = (struct __lv_timestamp_info *)data.data; + *startlsn = t1->lsn; + } + + /* + * Move to the last key/data pair of the duplicate set to get the + * biggest lsn having end as timestamp. + */ + key.data = &end; + key.size = sizeof(end); + if ((ret = __dbc_get(csr, &key, &data, DB_SET_RANGE)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND) { + endlsn->file = endlsn->offset = (u_int32_t)-1;/* Biggest lsn. */ + ret = 0; + goto err; /* We are done. */ + } + + /* + * Go to the biggest lsn of the dup set, if the key is the last one, + * go to the last one. + */ + if ((ret = __dbc_get(csr, &key, &data, DB_NEXT_NODUP)) != 0 && + ret != DB_NOTFOUND) + goto err; + + if (ret == DB_NOTFOUND) + BDBOP(__dbc_get(csr, &key, &data, DB_LAST)); + else + BDBOP(__dbc_get(csr, &key, &data, DB_PREV)); + + t2 = (struct __lv_timestamp_info *)data.data; + *endlsn = t2->lsn; +err: + if (ret == DB_NOTFOUND) + ret = 0; + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * PUBLIC: int __add_txnrange __P((DB_LOG_VRFY_INFO *, u_int32_t, + * PUBLIC: DB_LSN, int32_t, int)); + */ +int __add_txnrange (lvinfo, txnid, lsn, when, ishead) + DB_LOG_VRFY_INFO *lvinfo; + u_int32_t txnid; + DB_LSN lsn; + int32_t when; + int ishead; /* Whether it's the 1st log of the txn. */ +{ + int ret, tret; + DBC *csr; + struct __lv_txnrange tr, *ptr; + DBT key, data; + + csr = NULL; + ret = 0; + ptr = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&tr, 0, sizeof(tr)); + + key.data = &txnid; + key.size = sizeof(txnid); + tr.txnid = txnid; + BDBOP(__db_cursor(lvinfo->txnrngs, lvinfo->ip, NULL, &csr, 0)); + /* + * Note that we will backward play the logs to gather such information. + */ + if (!ishead) { + tr.end = lsn; + tr.when_commit = when; + data.data = &tr; + data.size = sizeof(tr); + BDBOP(__dbc_put(csr, &key, &data, DB_KEYFIRST)); + } else { + /* + * Dup data sorted by lsn, and we are backward playing logs, + * so the 1st record should be the one we want. + */ + BDBOP(__dbc_get(csr, &key, &data, DB_SET)); + ptr = (struct __lv_txnrange *)data.data; + DB_ASSERT(lvinfo->dbenv->env, IS_ZERO_LSN(ptr->begin)); + ptr->begin = lsn; + BDBOP(__dbc_put(csr, &key, &data, DB_CURRENT)); + } + +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * __get_aborttxn -- + * If lsn is the last log of an aborted txn T, T's txnid is + * returned via the log verify handle. + * + * PUBLIC: int __get_aborttxn __P((DB_LOG_VRFY_INFO *, DB_LSN)); + */ +int +__get_aborttxn(lvinfo, lsn) + DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; +{ + int ret, tret; + u_int32_t txnid; + DBC *csr; + DBT key, data; + + csr = NULL; + txnid = 0; + ret = tret = 0; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &lsn; + key.size = sizeof(lsn); + BDBOP(__db_cursor(lvinfo->txnaborts, lvinfo->ip, NULL, &csr, 0)); + BDBOP(__dbc_get(csr, &key, &data, DB_SET)); + memcpy(&txnid, data.data, data.size); + /* + * The lsn is the last op of an aborted txn, call __on_txnabort + * before processing next log record. + */ + lvinfo->aborted_txnid = txnid; + lvinfo->aborted_txnlsn = lsn; + +err: + /* It's OK if can't find it. */ + if (ret == DB_NOTFOUND) + ret = 0; + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * __txn_started -- + * Whether txnid is started before lsn and ended after lsn. + * + * PUBLIC: int __txn_started __P((DB_LOG_VRFY_INFO *, + * PUBLIC: DB_LSN, u_int32_t, int *)); + */ +int +__txn_started(lvinfo, lsn, txnid, res) + DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; + u_int32_t txnid; + int *res; +{ + int ret, tret; + DBC *csr; + DBT key, data; + struct __lv_txnrange *ptr, tr; + + ret = *res = 0; + csr = NULL; + memset(&tr, 0, sizeof(tr)); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = &txnid; + key.size = sizeof(txnid); + + BDBOP(__db_cursor(lvinfo->txnrngs, lvinfo->ip, NULL, &csr, 0)); + BDBOP(__dbc_get(csr, &key, &data, DB_SET)); + for (;ret == 0; ret = __dbc_get(csr, &key, &data, DB_NEXT_DUP)) { + ptr = (struct __lv_txnrange *)data.data; + if (LOG_COMPARE(&lsn, &(ptr->begin)) > 0 && + LOG_COMPARE(&lsn, &(ptr->end)) <= 0) { + *res = 1; + break; + } + } +err: + if (ret == DB_NOTFOUND) + ret = 0;/* It's OK if can't find it. */ + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * PUBLIC: int __set_logvrfy_dbfuid __P((DB_LOG_VRFY_INFO *)); + */ +int +__set_logvrfy_dbfuid(lvinfo) + DB_LOG_VRFY_INFO *lvinfo; +{ + int ret; + const char *p; + DBT key, data; + size_t buflen; + + p = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + /* So far we only support verifying a specific db file. */ + p = lvinfo->lv_config->dbfile; + buflen = sizeof(char) * (strlen(p) + 1); + key.data = (char *)p; + key.size = (u_int32_t)buflen; + + BDBOP2(lvinfo->dbenv, __db_get(lvinfo->fnameuid, lvinfo->ip, NULL, + &key, &data, 0), "__set_logvrfy_dbfuid"); + + memcpy(lvinfo->target_dbid, data.data, DB_FILE_ID_LEN); + + return (ret); +} + +/* + * __add_page_to_txn -- + * Try adding a page to a txn, result brings back if really added(0/1) + * or if there is an access violation(-1). + * PUBLIC: int __add_page_to_txn __P((DB_LOG_VRFY_INFO *, + * PUBLIC: int32_t, db_pgno_t, u_int32_t, u_int32_t *, int *)); + */ +int +__add_page_to_txn (lvinfo, dbregid, pgno, txnid, otxn, result) + DB_LOG_VRFY_INFO *lvinfo; + int32_t dbregid; + db_pgno_t pgno; + u_int32_t txnid, *otxn; + int *result; +{ + int ret; + u_int8_t *buf; + DBT key, data; + size_t buflen; + u_int32_t txnid2; + VRFY_FILELIFE *pff; + + if (txnid < TXN_MINIMUM) { + *result = 0; + return (0); + } + buf = NULL; + ret = 0; + txnid2 = 0; + pff = NULL; + buflen = sizeof(u_int8_t) * DB_FILE_ID_LEN + sizeof(db_pgno_t); + BDBOP(__os_malloc(lvinfo->dbenv->env, buflen, &buf)); + memset(buf, 0, buflen); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + /* + * We use the file uid as key because a single db file can have + * multiple dbregid at the same time, and we may neglect the fact + * that the same db file is being updated by multiple txns if we use + * dbregid as key. + */ + key.data = &dbregid; + key.size = sizeof(dbregid); + if ((ret = __db_get(lvinfo->dbregids, lvinfo->ip, NULL, + &key, &data, 0)) != 0) { + if (ret == DB_NOTFOUND) { + if (F_ISSET(lvinfo, DB_LOG_VERIFY_PARTIAL)) { + ret = 0; + goto out; + } else + F_SET(lvinfo, DB_LOG_VERIFY_INTERR); + } + goto err; + } + pff = (VRFY_FILELIFE *)data.data; + memcpy(buf, pff->fileid, DB_FILE_ID_LEN); + memcpy(buf + DB_FILE_ID_LEN, (u_int8_t *)&pgno, sizeof(pgno)); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = buf; + key.size = (u_int32_t)buflen; + if ((ret = __db_get(lvinfo->pgtxn, lvinfo->ip, NULL, + &key, &data, 0)) != 0) { + if (ret == DB_NOTFOUND) { + data.data = &txnid; + data.size = sizeof(txnid); + BDBOP(__db_put(lvinfo->pgtxn, lvinfo->ip, NULL, &key, + &data, 0)); + *result = 1; + ret = 0;/* This is not an error. */ + } + goto err; + } + DB_ASSERT(lvinfo->dbenv->env, data.size == sizeof(txnid2)); + memcpy(&txnid2, data.data, data.size); + if (txnid == txnid2)/* The same txn already has the page. */ + *result = 0; + else {/* Txn txnid is updating pages still held by txnid2. */ + *result = -1; + *otxn = txnid2; + } +out: + /* result is set to -1 on violation, 0 if already has it, 1 if added. */ +err: + if (buf != NULL) + __os_free(lvinfo->dbenv->env, buf); + return (ret); +} + +/* + * PUBLIC: int __del_txn_pages __P((DB_LOG_VRFY_INFO *, u_int32_t)); + */ +int +__del_txn_pages(lvinfo, txnid) + DB_LOG_VRFY_INFO *lvinfo; + u_int32_t txnid; +{ + int ret; + DBT key; + + ret = 0; + memset(&key, 0, sizeof(DBT)); + key.data = &txnid; + key.size = sizeof(txnid); + + BDBOP(__db_del(lvinfo->txnpg, lvinfo->ip, NULL, &key, 0)); + +err: + return (ret); +} + +/* + * __is_ancestor_txn -- + * Tells via res if ptxnid is txnid's parent txn at the moment of lsn. + * + * PUBLIC: int __is_ancestor_txn __P((DB_LOG_VRFY_INFO *, + * PUBLIC: u_int32_t, u_int32_t, DB_LSN, int *)); + */ +int +__is_ancestor_txn (lvinfo, ptxnid, txnid, lsn, res) + DB_LOG_VRFY_INFO *lvinfo; + u_int32_t ptxnid, txnid; + DB_LSN lsn; + int *res; +{ + u_int32_t ptid; + int ret, tret; + DBC *csr; + DB *pdb; + DBT key, data; + struct __lv_txnrange tr; + + ret = 0; + ptid = txnid; + csr = NULL; + pdb = lvinfo->txnrngs; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + *res = 0; + BDBOP(__db_cursor(pdb, lvinfo->ip, NULL, &csr, 0)); + + /* See if ptxnid is an ancestor of txnid. */ + do { + key.data = &ptid; + key.size = sizeof(ptid); + BDBOP(__dbc_get(csr, &key, &data, DB_SET)); + /* A txnid maybe reused, we want the range having lsn in it. */ + for (;ret == 0; + ret = __dbc_get(csr, &key, &data, DB_NEXT_DUP)) { + DB_ASSERT(pdb->env, sizeof(tr) == data.size); + memcpy(&tr, data.data, data.size); + if (tr.ptxnid > 0 && + LOG_COMPARE(&lsn, &(tr.begin)) >= 0 && + LOG_COMPARE(&lsn, &(tr.end)) <= 0) + break; + } + + if (tr.ptxnid == ptxnid) { + *res = 1; + goto out; + } else + ptid = tr.ptxnid; + + } while (ptid != 0); +out: + +err: + if (ret == DB_NOTFOUND) + ret = 0; + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * PUBLIC: int __return_txn_pages __P((DB_LOG_VRFY_INFO *, + * PUBLIC: u_int32_t, u_int32_t)); + */ +int __return_txn_pages(lvh, ctxn, ptxn) + DB_LOG_VRFY_INFO *lvh; + u_int32_t ctxn, ptxn; +{ + int ret, tret; + DBC *csr; + DB *pdb, *sdb; + DBT key, key2, data, data2; + char buf[DB_FILE_ID_LEN + sizeof(db_pgno_t)]; + + ret = tret = 0; + csr = NULL; + sdb = lvh->txnpg; + pdb = lvh->pgtxn; + memset(&key, 0, sizeof(DBT)); + memset(&key2, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&data2, 0, sizeof(DBT)); + + BDBOP(__db_cursor(sdb, lvh->ip, NULL, &csr, 0)); + key.data = &ctxn; + key.size = sizeof(ctxn); + key2.data = &ptxn; + key2.size = sizeof(ptxn); + data2.data = buf; + data2.ulen = DB_FILE_ID_LEN + sizeof(db_pgno_t); + data2.flags = DB_DBT_USERMEM; + + for (ret = __dbc_pget(csr, &key, &data2, &data, DB_SET); ret == 0; + ret = __dbc_pget(csr, &key, &data2, &data, DB_NEXT_DUP)) + BDBOP(__db_put(pdb, lvh->ip, NULL, &data2, &key2, 0)); + if ((ret = __del_txn_pages(lvh, ctxn)) != 0 && ret != DB_NOTFOUND) + goto err; +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +#define ADD_ITEM(lvh, logtype) ((lvh)->logtype_names[(logtype)] = (#logtype)) +static void +__lv_setup_logtype_names(lvinfo) + DB_LOG_VRFY_INFO *lvinfo; +{ + ADD_ITEM(lvinfo, DB___bam_irep); + ADD_ITEM(lvinfo, DB___bam_split_42); + ADD_ITEM(lvinfo, DB___bam_split); + ADD_ITEM(lvinfo, DB___bam_rsplit); + ADD_ITEM(lvinfo, DB___bam_adj); + ADD_ITEM(lvinfo, DB___bam_cadjust); + ADD_ITEM(lvinfo, DB___bam_cdel); + ADD_ITEM(lvinfo, DB___bam_repl); + ADD_ITEM(lvinfo, DB___bam_root); + ADD_ITEM(lvinfo, DB___bam_curadj); + ADD_ITEM(lvinfo, DB___bam_rcuradj); + ADD_ITEM(lvinfo, DB___bam_relink_43); + ADD_ITEM(lvinfo, DB___bam_merge_44); + ADD_ITEM(lvinfo, DB___crdel_metasub); + ADD_ITEM(lvinfo, DB___crdel_inmem_create); + ADD_ITEM(lvinfo, DB___crdel_inmem_rename); + ADD_ITEM(lvinfo, DB___crdel_inmem_remove); + ADD_ITEM(lvinfo, DB___dbreg_register); + ADD_ITEM(lvinfo, DB___db_addrem); + ADD_ITEM(lvinfo, DB___db_big); + ADD_ITEM(lvinfo, DB___db_ovref); + ADD_ITEM(lvinfo, DB___db_relink_42); + ADD_ITEM(lvinfo, DB___db_debug); + ADD_ITEM(lvinfo, DB___db_noop); + ADD_ITEM(lvinfo, DB___db_pg_alloc_42); + ADD_ITEM(lvinfo, DB___db_pg_alloc); + ADD_ITEM(lvinfo, DB___db_pg_free_42); + ADD_ITEM(lvinfo, DB___db_pg_free); + ADD_ITEM(lvinfo, DB___db_cksum); + ADD_ITEM(lvinfo, DB___db_pg_freedata_42); + ADD_ITEM(lvinfo, DB___db_pg_freedata); + ADD_ITEM(lvinfo, DB___db_pg_init); + ADD_ITEM(lvinfo, DB___db_pg_sort_44); + ADD_ITEM(lvinfo, DB___db_pg_trunc); + ADD_ITEM(lvinfo, DB___db_realloc); + ADD_ITEM(lvinfo, DB___db_relink); + ADD_ITEM(lvinfo, DB___db_merge); + ADD_ITEM(lvinfo, DB___db_pgno); +#ifdef HAVE_HASH + ADD_ITEM(lvinfo, DB___ham_insdel); + ADD_ITEM(lvinfo, DB___ham_newpage); + ADD_ITEM(lvinfo, DB___ham_splitdata); + ADD_ITEM(lvinfo, DB___ham_replace); + ADD_ITEM(lvinfo, DB___ham_copypage); + ADD_ITEM(lvinfo, DB___ham_metagroup_42); + ADD_ITEM(lvinfo, DB___ham_metagroup); + ADD_ITEM(lvinfo, DB___ham_groupalloc_42); + ADD_ITEM(lvinfo, DB___ham_groupalloc); + ADD_ITEM(lvinfo, DB___ham_changeslot); + ADD_ITEM(lvinfo, DB___ham_contract); + ADD_ITEM(lvinfo, DB___ham_curadj); + ADD_ITEM(lvinfo, DB___ham_chgpg); +#endif +#ifdef HAVE_QUEUE + ADD_ITEM(lvinfo, DB___qam_incfirst); + ADD_ITEM(lvinfo, DB___qam_mvptr); + ADD_ITEM(lvinfo, DB___qam_del); + ADD_ITEM(lvinfo, DB___qam_add); + ADD_ITEM(lvinfo, DB___qam_delext); +#endif + ADD_ITEM(lvinfo, DB___txn_regop_42); + ADD_ITEM(lvinfo, DB___txn_regop); + ADD_ITEM(lvinfo, DB___txn_ckp_42); + ADD_ITEM(lvinfo, DB___txn_ckp); + ADD_ITEM(lvinfo, DB___txn_child); + ADD_ITEM(lvinfo, DB___txn_xa_regop_42); + ADD_ITEM(lvinfo, DB___txn_prepare); + ADD_ITEM(lvinfo, DB___txn_recycle); + ADD_ITEM(lvinfo, DB___fop_create_42); + ADD_ITEM(lvinfo, DB___fop_create); + ADD_ITEM(lvinfo, DB___fop_remove); + ADD_ITEM(lvinfo, DB___fop_write_42); + ADD_ITEM(lvinfo, DB___fop_write); + ADD_ITEM(lvinfo, DB___fop_rename_42); + ADD_ITEM(lvinfo, DB___fop_rename_noundo_46); + ADD_ITEM(lvinfo, DB___fop_rename); + ADD_ITEM(lvinfo, DB___fop_rename_noundo); + ADD_ITEM(lvinfo, DB___fop_file_remove); +} |