diff options
author | tim@threads.polyesthetic.msg <> | 2001-03-04 19:42:05 -0500 |
---|---|---|
committer | tim@threads.polyesthetic.msg <> | 2001-03-04 19:42:05 -0500 |
commit | 89dad52004ecba5a380aeebb0e2a9beaae88eb86 (patch) | |
tree | 9dd732e08dba156ee3d7635caedc0dc3107ecac6 /bdb/log | |
parent | 639a1069d313843288ba6d9cb54b290073a748a7 (diff) | |
download | mariadb-git-89dad52004ecba5a380aeebb0e2a9beaae88eb86.tar.gz |
Import changeset
Diffstat (limited to 'bdb/log')
-rw-r--r-- | bdb/log/log.c | 653 | ||||
-rw-r--r-- | bdb/log/log.src | 46 | ||||
-rw-r--r-- | bdb/log/log_archive.c | 447 | ||||
-rw-r--r-- | bdb/log/log_auto.c | 326 | ||||
-rw-r--r-- | bdb/log/log_compare.c | 34 | ||||
-rw-r--r-- | bdb/log/log_findckp.c | 135 | ||||
-rw-r--r-- | bdb/log/log_get.c | 465 | ||||
-rw-r--r-- | bdb/log/log_method.c | 121 | ||||
-rw-r--r-- | bdb/log/log_put.c | 701 | ||||
-rw-r--r-- | bdb/log/log_rec.c | 621 | ||||
-rw-r--r-- | bdb/log/log_register.c | 433 |
11 files changed, 3982 insertions, 0 deletions
diff --git a/bdb/log/log.c b/bdb/log/log.c new file mode 100644 index 00000000000..69af1624824 --- /dev/null +++ b/bdb/log/log.c @@ -0,0 +1,653 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log.c,v 11.42 2001/01/15 16:42:37 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "log.h" +#include "db_dispatch.h" +#include "txn.h" +#include "txn_auto.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +static int __log_init __P((DB_ENV *, DB_LOG *)); +static int __log_recover __P((DB_LOG *)); + +/* + * __log_open -- + * Internal version of log_open: only called from DB_ENV->open. + * + * PUBLIC: int __log_open __P((DB_ENV *)); + */ +int +__log_open(dbenv) + DB_ENV *dbenv; +{ + DB_LOG *dblp; + LOG *lp; + int ret; + u_int8_t *readbufp; + + readbufp = NULL; + + /* Create/initialize the DB_LOG structure. */ + if ((ret = __os_calloc(dbenv, 1, sizeof(DB_LOG), &dblp)) != 0) + return (ret); + if ((ret = __os_calloc(dbenv, 1, dbenv->lg_bsize, &readbufp)) != 0) + goto err; + ZERO_LSN(dblp->c_lsn); + dblp->dbenv = dbenv; + + /* Join/create the log region. */ + dblp->reginfo.type = REGION_TYPE_LOG; + dblp->reginfo.id = INVALID_REGION_ID; + dblp->reginfo.mode = dbenv->db_mode; + dblp->reginfo.flags = REGION_JOIN_OK; + if (F_ISSET(dbenv, DB_ENV_CREATE)) + F_SET(&dblp->reginfo, REGION_CREATE_OK); + if ((ret = __db_r_attach( + dbenv, &dblp->reginfo, LG_BASE_REGION_SIZE + dbenv->lg_bsize)) != 0) + goto err; + + dblp->readbufp = readbufp; + + /* If we created the region, initialize it. */ + if (F_ISSET(&dblp->reginfo, REGION_CREATE) && + (ret = __log_init(dbenv, dblp)) != 0) + goto err; + + /* Set the local addresses. */ + lp = dblp->reginfo.primary = + R_ADDR(&dblp->reginfo, dblp->reginfo.rp->primary); + dblp->bufp = R_ADDR(&dblp->reginfo, lp->buffer_off); + + /* + * If the region is threaded, then we have to lock both the handles + * and the region, and we need to allocate a mutex for that purpose. + */ + if (F_ISSET(dbenv, DB_ENV_THREAD)) { + if ((ret = __db_mutex_alloc( + dbenv, &dblp->reginfo, &dblp->mutexp)) != 0) + goto err; + if ((ret = __db_mutex_init( + dbenv, dblp->mutexp, 0, MUTEX_THREAD)) != 0) + goto err; + } + + R_UNLOCK(dbenv, &dblp->reginfo); + + dblp->r_file = 0; + dblp->r_off = 0; + dblp->r_size = 0; + dbenv->lg_handle = dblp; + return (0); + +err: if (dblp->reginfo.addr != NULL) { + if (F_ISSET(&dblp->reginfo, REGION_CREATE)) + ret = __db_panic(dbenv, ret); + R_UNLOCK(dbenv, &dblp->reginfo); + (void)__db_r_detach(dbenv, &dblp->reginfo, 0); + } + + if (readbufp != NULL) + __os_free(readbufp, dbenv->lg_bsize); + if (dblp->mutexp != NULL) + __db_mutex_free(dbenv, &dblp->reginfo, dblp->mutexp); + __os_free(dblp, sizeof(*dblp)); + return (ret); +} + +/* + * __log_init -- + * Initialize a log region in shared memory. + */ +static int +__log_init(dbenv, dblp) + DB_ENV *dbenv; + DB_LOG *dblp; +{ + LOG *region; + int ret; + void *p; + + if ((ret = __db_shalloc(dblp->reginfo.addr, + sizeof(*region), 0, &dblp->reginfo.primary)) != 0) + goto mem_err; + dblp->reginfo.rp->primary = + R_OFFSET(&dblp->reginfo, dblp->reginfo.primary); + region = dblp->reginfo.primary; + memset(region, 0, sizeof(*region)); + + region->persist.lg_max = dbenv->lg_max; + region->persist.magic = DB_LOGMAGIC; + region->persist.version = DB_LOGVERSION; + region->persist.mode = dbenv->db_mode; + SH_TAILQ_INIT(®ion->fq); + + /* Initialize LOG LSNs. */ + region->lsn.file = 1; + region->lsn.offset = 0; + + /* Initialize the buffer. */ + if ((ret = + __db_shalloc(dblp->reginfo.addr, dbenv->lg_bsize, 0, &p)) != 0) { +mem_err: __db_err(dbenv, "Unable to allocate memory for the log buffer"); + return (ret); + } + region->buffer_size = dbenv->lg_bsize; + region->buffer_off = R_OFFSET(&dblp->reginfo, p); + + /* Try and recover any previous log files before releasing the lock. */ + return (__log_recover(dblp)); +} + +/* + * __log_recover -- + * Recover a log. + */ +static int +__log_recover(dblp) + DB_LOG *dblp; +{ + DBT dbt; + DB_LSN lsn; + LOG *lp; + int cnt, found_checkpoint, ret; + u_int32_t chk; + logfile_validity status; + + lp = dblp->reginfo.primary; + + /* + * Find a log file. If none exist, we simply return, leaving + * everything initialized to a new log. + */ + if ((ret = __log_find(dblp, 0, &cnt, &status)) != 0) + return (ret); + if (cnt == 0) + return (0); + + /* + * If the last file is an old version, readable or no, start a new + * file. Don't bother finding checkpoints; if we didn't take a + * checkpoint right before upgrading, the user screwed up anyway. + */ + if (status == DB_LV_OLD_READABLE || status == DB_LV_OLD_UNREADABLE) { + lp->lsn.file = lp->s_lsn.file = cnt + 1; + lp->lsn.offset = lp->s_lsn.offset = 0; + goto skipsearch; + } + DB_ASSERT(status == DB_LV_NORMAL); + + /* + * We have the last useful log file and we've loaded any persistent + * information. Set the end point of the log past the end of the last + * file. Read the last file, looking for the last checkpoint and + * the log's end. + */ + lp->lsn.file = cnt + 1; + lp->lsn.offset = 0; + lsn.file = cnt; + lsn.offset = 0; + + /* Set the cursor. Shouldn't fail; leave error messages on. */ + memset(&dbt, 0, sizeof(dbt)); + if ((ret = __log_get(dblp, &lsn, &dbt, DB_SET, 0)) != 0) + return (ret); + + /* + * Read to the end of the file, saving checkpoints. This will fail + * at some point, so turn off error messages. + */ + found_checkpoint = 0; + while (__log_get(dblp, &lsn, &dbt, DB_NEXT, 1) == 0) { + if (dbt.size < sizeof(u_int32_t)) + continue; + memcpy(&chk, dbt.data, sizeof(u_int32_t)); + if (chk == DB_txn_ckp) { + lp->chkpt_lsn = lsn; + found_checkpoint = 1; + } + } + + /* + * We now know where the end of the log is. Set the first LSN that + * we want to return to an application and the LSN of the last known + * record on disk. + */ + lp->lsn = lsn; + lp->s_lsn = lsn; + lp->lsn.offset += dblp->c_len; + lp->s_lsn.offset += dblp->c_len; + + /* Set up the current buffer information, too. */ + lp->len = dblp->c_len; + lp->b_off = 0; + lp->w_off = lp->lsn.offset; + + /* + * It's possible that we didn't find a checkpoint because there wasn't + * one in the last log file. Start searching. + */ + if (!found_checkpoint && cnt > 1) { + lsn.file = cnt; + lsn.offset = 0; + + /* Set the cursor. Shouldn't fail, leave error messages on. */ + if ((ret = __log_get(dblp, &lsn, &dbt, DB_SET, 0)) != 0) + return (ret); + + /* + * Read to the end of the file, saving checkpoints. Again, + * this can fail if there are no checkpoints in any log file, + * so turn error messages off. + */ + while (__log_get(dblp, &lsn, &dbt, DB_PREV, 1) == 0) { + if (dbt.size < sizeof(u_int32_t)) + continue; + memcpy(&chk, dbt.data, sizeof(u_int32_t)); + if (chk == DB_txn_ckp) { + lp->chkpt_lsn = lsn; + found_checkpoint = 1; + break; + } + } + } + + /* If we never find a checkpoint, that's okay, just 0 it out. */ + if (!found_checkpoint) +skipsearch: ZERO_LSN(lp->chkpt_lsn); + + /* + * Reset the cursor lsn to the beginning of the log, so that an + * initial call to DB_NEXT does the right thing. + */ + ZERO_LSN(dblp->c_lsn); + + if (FLD_ISSET(dblp->dbenv->verbose, DB_VERB_RECOVERY)) + __db_err(dblp->dbenv, + "Finding last valid log LSN: file: %lu offset %lu", + (u_long)lp->lsn.file, (u_long)lp->lsn.offset); + + return (0); +} + +/* + * __log_find -- + * Try to find a log file. If find_first is set, valp will contain + * the number of the first readable log file, else it will contain the number + * of the last log file (which may be too old to read). + * + * PUBLIC: int __log_find __P((DB_LOG *, int, int *, logfile_validity *)); + */ +int +__log_find(dblp, find_first, valp, statusp) + DB_LOG *dblp; + int find_first, *valp; + logfile_validity *statusp; +{ + logfile_validity clv_status, status; + u_int32_t clv, logval; + int cnt, fcnt, ret; + const char *dir; + char **names, *p, *q, savech; + + clv_status = status = DB_LV_NORMAL; + + /* Return a value of 0 as the log file number on failure. */ + *valp = 0; + + /* Find the directory name. */ + if ((ret = __log_name(dblp, 1, &p, NULL, 0)) != 0) + return (ret); + if ((q = __db_rpath(p)) == NULL) { + COMPQUIET(savech, 0); + dir = PATH_DOT; + } else { + savech = *q; + *q = '\0'; + dir = p; + } + + /* Get the list of file names. */ + ret = __os_dirlist(dblp->dbenv, dir, &names, &fcnt); + + /* + * !!! + * We overwrote a byte in the string with a nul. Restore the string + * so that the diagnostic checks in the memory allocation code work + * and any error messages display the right file name. + */ + if (q != NULL) + *q = savech; + + if (ret != 0) { + __db_err(dblp->dbenv, "%s: %s", dir, db_strerror(ret)); + __os_freestr(p); + return (ret); + } + + /* Search for a valid log file name. */ + for (cnt = fcnt, clv = logval = 0; --cnt >= 0;) { + if (strncmp(names[cnt], LFPREFIX, sizeof(LFPREFIX) - 1) != 0) + continue; + + /* + * Use atol, not atoi; if an "int" is 16-bits, the largest + * log file name won't fit. + */ + clv = atol(names[cnt] + (sizeof(LFPREFIX) - 1)); + if (find_first) { + if (logval != 0 && clv > logval) + continue; + } else + if (logval != 0 && clv < logval) + continue; + + /* + * Take note of whether the log file logval is + * an old version or incompletely initialized. + */ + if ((ret = __log_valid(dblp, clv, 1, &status)) != 0) + goto err; + switch (status) { + case DB_LV_INCOMPLETE: + /* + * It's acceptable for the last log file to + * have been incompletely initialized--it's possible + * to create a log file but not write anything to it, + * and recovery needs to gracefully handle this. + * + * Just ignore it; we don't want to return this + * as a valid log file. + */ + break; + case DB_LV_NORMAL: + case DB_LV_OLD_READABLE: + logval = clv; + clv_status = status; + break; + case DB_LV_OLD_UNREADABLE: + /* + * Continue; we want the oldest valid log, + * and clv is too old to be useful. We don't + * want it to supplant logval if we're looking for + * the oldest valid log, but we do want to return + * it if it's the last log file--we want the very + * last file number, so that our caller can + * start a new file after it. + * + * The code here assumes that there will never + * be a too-old log that's preceded by a log + * of the current version, but in order to + * attain that state of affairs the user + * would have had to really seriously screw + * up; I think we can safely assume this won't + * happen. + */ + if (!find_first) { + logval = clv; + clv_status = status; + } + break; + } + } + + *valp = logval; + +err: __os_dirfree(names, fcnt); + __os_freestr(p); + *statusp = clv_status; + + return (ret); +} + +/* + * log_valid -- + * Validate a log file. Returns an error code in the event of + * a fatal flaw in a the specified log file; returns success with + * a code indicating the currentness and completeness of the specified + * log file if it is not unexpectedly flawed (that is, if it's perfectly + * normal, if it's zero-length, or if it's an old version). + * + * PUBLIC: int __log_valid __P((DB_LOG *, u_int32_t, int, logfile_validity *)); + */ +int +__log_valid(dblp, number, set_persist, statusp) + DB_LOG *dblp; + u_int32_t number; + int set_persist; + logfile_validity *statusp; +{ + DB_FH fh; + LOG *region; + LOGP persist; + char *fname; + int ret; + logfile_validity status; + size_t nw; + + status = DB_LV_NORMAL; + + /* Try to open the log file. */ + if ((ret = __log_name(dblp, + number, &fname, &fh, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) { + __os_freestr(fname); + return (ret); + } + + /* Try to read the header. */ + if ((ret = + __os_seek(dblp->dbenv, + &fh, 0, 0, sizeof(HDR), 0, DB_OS_SEEK_SET)) != 0 || + (ret = + __os_read(dblp->dbenv, &fh, &persist, sizeof(LOGP), &nw)) != 0 || + nw != sizeof(LOGP)) { + if (ret == 0) + status = DB_LV_INCOMPLETE; + else + /* + * The error was a fatal read error, not just an + * incompletely initialized log file. + */ + __db_err(dblp->dbenv, "Ignoring log file: %s: %s", + fname, db_strerror(ret)); + + (void)__os_closehandle(&fh); + goto err; + } + (void)__os_closehandle(&fh); + + /* Validate the header. */ + if (persist.magic != DB_LOGMAGIC) { + __db_err(dblp->dbenv, + "Ignoring log file: %s: magic number %lx, not %lx", + fname, (u_long)persist.magic, (u_long)DB_LOGMAGIC); + ret = EINVAL; + goto err; + } + + /* + * Set our status code to indicate whether the log file + * belongs to an unreadable or readable old version; leave it + * alone if and only if the log file version is the current one. + */ + if (persist.version > DB_LOGVERSION) { + /* This is a fatal error--the log file is newer than DB. */ + __db_err(dblp->dbenv, + "Ignoring log file: %s: unsupported log version %lu", + fname, (u_long)persist.version); + ret = EINVAL; + goto err; + } else if (persist.version < DB_LOGOLDVER) { + status = DB_LV_OLD_UNREADABLE; + /* + * We don't want to set persistent info based on an + * unreadable region, so jump to "err". + */ + goto err; + } else if (persist.version < DB_LOGVERSION) + status = DB_LV_OLD_READABLE; + + /* + * If the log is thus far readable and we're doing system + * initialization, set the region's persistent information + * based on the headers. + */ + if (set_persist) { + region = dblp->reginfo.primary; + region->persist.lg_max = persist.lg_max; + region->persist.mode = persist.mode; + } + +err: __os_freestr(fname); + *statusp = status; + return (ret); +} + +/* + * __log_close -- + * Internal version of log_close: only called from dbenv_refresh. + * + * PUBLIC: int __log_close __P((DB_ENV *)); + */ +int +__log_close(dbenv) + DB_ENV *dbenv; +{ + DB_LOG *dblp; + int ret, t_ret; + + ret = 0; + dblp = dbenv->lg_handle; + + /* We may have opened files as part of XA; if so, close them. */ + F_SET(dblp, DBLOG_RECOVER); + __log_close_files(dbenv); + + /* Discard the per-thread lock. */ + if (dblp->mutexp != NULL) + __db_mutex_free(dbenv, &dblp->reginfo, dblp->mutexp); + + /* Detach from the region. */ + ret = __db_r_detach(dbenv, &dblp->reginfo, 0); + + /* Close open files, release allocated memory. */ + if (F_ISSET(&dblp->lfh, DB_FH_VALID) && + (t_ret = __os_closehandle(&dblp->lfh)) != 0 && ret == 0) + ret = t_ret; + if (dblp->c_dbt.data != NULL) + __os_free(dblp->c_dbt.data, dblp->c_dbt.ulen); + if (F_ISSET(&dblp->c_fh, DB_FH_VALID) && + (t_ret = __os_closehandle(&dblp->c_fh)) != 0 && ret == 0) + ret = t_ret; + if (dblp->dbentry != NULL) + __os_free(dblp->dbentry, + (dblp->dbentry_cnt * sizeof(DB_ENTRY))); + if (dblp->readbufp != NULL) + __os_free(dblp->readbufp, dbenv->lg_bsize); + + __os_free(dblp, sizeof(*dblp)); + + dbenv->lg_handle = NULL; + return (ret); +} + +/* + * log_stat -- + * Return LOG statistics. + */ +int +log_stat(dbenv, statp, db_malloc) + DB_ENV *dbenv; + DB_LOG_STAT **statp; + void *(*db_malloc) __P((size_t)); +{ + DB_LOG *dblp; + DB_LOG_STAT *stats; + LOG *region; + int ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_stat(dbenv, statp, db_malloc)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + *statp = NULL; + + dblp = dbenv->lg_handle; + region = dblp->reginfo.primary; + + if ((ret = __os_malloc(dbenv, + sizeof(DB_LOG_STAT), db_malloc, &stats)) != 0) + return (ret); + + /* Copy out the global statistics. */ + R_LOCK(dbenv, &dblp->reginfo); + *stats = region->stat; + + stats->st_magic = region->persist.magic; + stats->st_version = region->persist.version; + stats->st_mode = region->persist.mode; + stats->st_lg_bsize = region->buffer_size; + stats->st_lg_max = region->persist.lg_max; + + stats->st_region_wait = dblp->reginfo.rp->mutex.mutex_set_wait; + stats->st_region_nowait = dblp->reginfo.rp->mutex.mutex_set_nowait; + stats->st_regsize = dblp->reginfo.rp->size; + + stats->st_cur_file = region->lsn.file; + stats->st_cur_offset = region->lsn.offset; + + R_UNLOCK(dbenv, &dblp->reginfo); + + *statp = stats; + return (0); +} + +/* + * __log_lastckp -- + * Return the current chkpt_lsn, so that we can store it in + * the transaction region and keep the chain of checkpoints + * unbroken across environment recreates. + * + * PUBLIC: int __log_lastckp __P((DB_ENV *, DB_LSN *)); + */ +int +__log_lastckp(dbenv, lsnp) + DB_ENV *dbenv; + DB_LSN *lsnp; +{ + LOG *lp; + + lp = (LOG *)(((DB_LOG *)dbenv->lg_handle)->reginfo.primary); + + *lsnp = lp->chkpt_lsn; + return (0); +} diff --git a/bdb/log/log.src b/bdb/log/log.src new file mode 100644 index 00000000000..a92fae8de26 --- /dev/null +++ b/bdb/log/log.src @@ -0,0 +1,46 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: log.src,v 10.12 2000/02/17 20:24:10 bostic Exp $ + */ + +PREFIX log + +INCLUDE #include "db_config.h" +INCLUDE +INCLUDE #ifndef NO_SYSTEM_INCLUDES +INCLUDE #include <sys/types.h> +INCLUDE +INCLUDE #include <ctype.h> +INCLUDE #include <errno.h> +INCLUDE #include <string.h> +INCLUDE #endif +INCLUDE +INCLUDE #include "db_int.h" +INCLUDE #include "db_page.h" +INCLUDE #include "db_dispatch.h" +INCLUDE #include "db_am.h" +INCLUDE #include "log.h" +INCLUDE #include "txn.h" +INCLUDE + +/* Used for registering name/id translations at open or close. */ +DEPRECATED register1 1 +ARG opcode u_int32_t lu +DBT name DBT s +DBT uid DBT s +ARG fileid int32_t ld +ARG ftype DBTYPE lx +END + +BEGIN register 2 +ARG opcode u_int32_t lu +DBT name DBT s +DBT uid DBT s +ARG fileid int32_t ld +ARG ftype DBTYPE lx +ARG meta_pgno db_pgno_t lu +END diff --git a/bdb/log/log_archive.c b/bdb/log/log_archive.c new file mode 100644 index 00000000000..83728c79e55 --- /dev/null +++ b/bdb/log/log_archive.c @@ -0,0 +1,447 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log_archive.c,v 11.13 2000/11/30 00:58:40 ubell Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "db_dispatch.h" +#include "log.h" +#include "clib_ext.h" /* XXX: needed for getcwd. */ + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +static int __absname __P((DB_ENV *, char *, char *, char **)); +static int __build_data __P((DB_ENV *, char *, char ***, void *(*)(size_t))); +static int __cmpfunc __P((const void *, const void *)); +static int __usermem __P((DB_ENV *, char ***, void *(*)(size_t))); + +/* + * log_archive -- + * Supporting function for db_archive(1). + */ +int +log_archive(dbenv, listp, flags, db_malloc) + DB_ENV *dbenv; + char ***listp; + u_int32_t flags; + void *(*db_malloc) __P((size_t)); +{ + DBT rec; + DB_LOG *dblp; + DB_LSN stable_lsn; + u_int32_t fnum; + int array_size, n, ret; + char **array, **arrayp, *name, *p, *pref, buf[MAXPATHLEN]; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_archive(dbenv, listp, flags, db_malloc)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + name = NULL; + dblp = dbenv->lg_handle; + COMPQUIET(fnum, 0); + +#define OKFLAGS (DB_ARCH_ABS | DB_ARCH_DATA | DB_ARCH_LOG) + if (flags != 0) { + if ((ret = + __db_fchk(dbenv, "log_archive", flags, OKFLAGS)) != 0) + return (ret); + if ((ret = + __db_fcchk(dbenv, + "log_archive", flags, DB_ARCH_DATA, DB_ARCH_LOG)) != 0) + return (ret); + } + + /* + * Get the absolute pathname of the current directory. It would + * be nice to get the shortest pathname of the database directory, + * but that's just not possible. + * + * XXX + * Can't trust getcwd(3) to set a valid errno. If it doesn't, just + * guess that we ran out of memory. + */ + if (LF_ISSET(DB_ARCH_ABS)) { + __os_set_errno(0); + if ((pref = getcwd(buf, sizeof(buf))) == NULL) { + if (__os_get_errno() == 0) + __os_set_errno(ENOMEM); + return (__os_get_errno()); + } + } else + pref = NULL; + + switch (LF_ISSET(~DB_ARCH_ABS)) { + case DB_ARCH_DATA: + return (__build_data(dbenv, pref, listp, db_malloc)); + case DB_ARCH_LOG: + memset(&rec, 0, sizeof(rec)); + if (F_ISSET(dbenv, DB_ENV_THREAD)) + F_SET(&rec, DB_DBT_MALLOC); + if ((ret = log_get(dbenv, &stable_lsn, &rec, DB_LAST)) != 0) + return (ret); + if (F_ISSET(dbenv, DB_ENV_THREAD)) + __os_free(rec.data, rec.size); + fnum = stable_lsn.file; + break; + case 0: + if ((ret = __log_findckp(dbenv, &stable_lsn)) != 0) { + /* + * A return of DB_NOTFOUND means that we didn't find + * any records in the log (so we are not going to be + * deleting any log files). + */ + if (ret != DB_NOTFOUND) + return (ret); + *listp = NULL; + return (0); + } + /* Remove any log files before the last stable LSN. */ + fnum = stable_lsn.file - 1; + break; + } + +#define LIST_INCREMENT 64 + /* Get some initial space. */ + array_size = 10; + if ((ret = __os_malloc(dbenv, + sizeof(char *) * array_size, NULL, &array)) != 0) + return (ret); + array[0] = NULL; + + /* Build an array of the file names. */ + for (n = 0; fnum > 0; --fnum) { + if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0) + goto err; + if (__os_exists(name, NULL) != 0) { + if (LF_ISSET(DB_ARCH_LOG) && fnum == stable_lsn.file) + continue; + __os_freestr(name); + name = NULL; + break; + } + + if (n >= array_size - 1) { + array_size += LIST_INCREMENT; + if ((ret = __os_realloc(dbenv, + sizeof(char *) * array_size, NULL, &array)) != 0) + goto err; + } + + if (LF_ISSET(DB_ARCH_ABS)) { + if ((ret = __absname(dbenv, + pref, name, &array[n])) != 0) + goto err; + __os_freestr(name); + } else if ((p = __db_rpath(name)) != NULL) { + if ((ret = __os_strdup(dbenv, p + 1, &array[n])) != 0) + goto err; + __os_freestr(name); + } else + array[n] = name; + + name = NULL; + array[++n] = NULL; + } + + /* If there's nothing to return, we're done. */ + if (n == 0) { + *listp = NULL; + ret = 0; + goto err; + } + + /* Sort the list. */ + qsort(array, (size_t)n, sizeof(char *), __cmpfunc); + + /* Rework the memory. */ + if ((ret = __usermem(dbenv, &array, db_malloc)) != 0) + goto err; + + *listp = array; + return (0); + +err: if (array != NULL) { + for (arrayp = array; *arrayp != NULL; ++arrayp) + __os_freestr(*arrayp); + __os_free(array, sizeof(char *) * array_size); + } + if (name != NULL) + __os_freestr(name); + return (ret); +} + +/* + * __build_data -- + * Build a list of datafiles for return. + */ +static int +__build_data(dbenv, pref, listp, db_malloc) + DB_ENV *dbenv; + char *pref, ***listp; + void *(*db_malloc) __P((size_t)); +{ + DBT rec; + DB_LSN lsn; + __log_register_args *argp; + u_int32_t rectype; + int array_size, last, n, nxt, ret; + char **array, **arrayp, *p, *real_name; + + /* Get some initial space. */ + array_size = 10; + if ((ret = __os_malloc(dbenv, + sizeof(char *) * array_size, NULL, &array)) != 0) + return (ret); + array[0] = NULL; + + memset(&rec, 0, sizeof(rec)); + if (F_ISSET(dbenv, DB_ENV_THREAD)) + F_SET(&rec, DB_DBT_MALLOC); + for (n = 0, ret = log_get(dbenv, &lsn, &rec, DB_FIRST); + ret == 0; ret = log_get(dbenv, &lsn, &rec, DB_NEXT)) { + if (rec.size < sizeof(rectype)) { + ret = EINVAL; + __db_err(dbenv, "log_archive: bad log record"); + goto lg_free; + } + + memcpy(&rectype, rec.data, sizeof(rectype)); + if (rectype != DB_log_register) { + if (F_ISSET(dbenv, DB_ENV_THREAD)) { + __os_free(rec.data, rec.size); + rec.data = NULL; + } + continue; + } + if ((ret = __log_register_read(dbenv, rec.data, &argp)) != 0) { + ret = EINVAL; + __db_err(dbenv, + "log_archive: unable to read log record"); + goto lg_free; + } + + if (n >= array_size - 1) { + array_size += LIST_INCREMENT; + if ((ret = __os_realloc(dbenv, + sizeof(char *) * array_size, NULL, &array)) != 0) + goto lg_free; + } + + if ((ret = __os_strdup(dbenv, + argp->name.data, &array[n])) != 0) { +lg_free: if (F_ISSET(&rec, DB_DBT_MALLOC) && rec.data != NULL) + __os_free(rec.data, rec.size); + goto err1; + } + + array[++n] = NULL; + __os_free(argp, 0); + + if (F_ISSET(dbenv, DB_ENV_THREAD)) { + __os_free(rec.data, rec.size); + rec.data = NULL; + } + } + + /* If there's nothing to return, we're done. */ + if (n == 0) { + ret = 0; + *listp = NULL; + goto err1; + } + + /* Sort the list. */ + qsort(array, (size_t)n, sizeof(char *), __cmpfunc); + + /* + * Build the real pathnames, discarding nonexistent files and + * duplicates. + */ + for (last = nxt = 0; nxt < n;) { + /* + * Discard duplicates. Last is the next slot we're going + * to return to the user, nxt is the next slot that we're + * going to consider. + */ + if (last != nxt) { + array[last] = array[nxt]; + array[nxt] = NULL; + } + for (++nxt; nxt < n && + strcmp(array[last], array[nxt]) == 0; ++nxt) { + __os_freestr(array[nxt]); + array[nxt] = NULL; + } + + /* Get the real name. */ + if ((ret = __db_appname(dbenv, + DB_APP_DATA, NULL, array[last], 0, NULL, &real_name)) != 0) + goto err2; + + /* If the file doesn't exist, ignore it. */ + if (__os_exists(real_name, NULL) != 0) { + __os_freestr(real_name); + __os_freestr(array[last]); + array[last] = NULL; + continue; + } + + /* Rework the name as requested by the user. */ + __os_freestr(array[last]); + array[last] = NULL; + if (pref != NULL) { + ret = __absname(dbenv, pref, real_name, &array[last]); + __os_freestr(real_name); + if (ret != 0) + goto err2; + } else if ((p = __db_rpath(real_name)) != NULL) { + ret = __os_strdup(dbenv, p + 1, &array[last]); + __os_freestr(real_name); + if (ret != 0) + goto err2; + } else + array[last] = real_name; + ++last; + } + + /* NULL-terminate the list. */ + array[last] = NULL; + + /* Rework the memory. */ + if ((ret = __usermem(dbenv, &array, db_malloc)) != 0) + goto err1; + + *listp = array; + return (0); + +err2: /* + * XXX + * We've possibly inserted NULLs into the array list, so clean up a + * bit so that the other error processing works. + */ + if (array != NULL) + for (; nxt < n; ++nxt) + __os_freestr(array[nxt]); + /* FALLTHROUGH */ + +err1: if (array != NULL) { + for (arrayp = array; *arrayp != NULL; ++arrayp) + __os_freestr(*arrayp); + __os_free(array, array_size * sizeof(char *)); + } + return (ret); +} + +/* + * __absname -- + * Return an absolute path name for the file. + */ +static int +__absname(dbenv, pref, name, newnamep) + DB_ENV *dbenv; + char *pref, *name, **newnamep; +{ + size_t l_pref, l_name; + int isabspath, ret; + char *newname; + + l_name = strlen(name); + isabspath = __os_abspath(name); + l_pref = isabspath ? 0 : strlen(pref); + + /* Malloc space for concatenating the two. */ + if ((ret = __os_malloc(dbenv, + l_pref + l_name + 2, NULL, &newname)) != 0) + return (ret); + *newnamep = newname; + + /* Build the name. If `name' is an absolute path, ignore any prefix. */ + if (!isabspath) { + memcpy(newname, pref, l_pref); + if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL) + newname[l_pref++] = PATH_SEPARATOR[0]; + } + memcpy(newname + l_pref, name, l_name + 1); + + return (0); +} + +/* + * __usermem -- + * Create a single chunk of memory that holds the returned information. + * If the user has their own malloc routine, use it. + */ +static int +__usermem(dbenv, listp, db_malloc) + DB_ENV *dbenv; + char ***listp; + void *(*db_malloc) __P((size_t)); +{ + size_t len; + int ret; + char **array, **arrayp, **orig, *strp; + + /* Find out how much space we need. */ + for (len = 0, orig = *listp; *orig != NULL; ++orig) + len += sizeof(char *) + strlen(*orig) + 1; + len += sizeof(char *); + + /* Allocate it and set up the pointers. */ + if ((ret = __os_malloc(dbenv, len, db_malloc, &array)) != 0) + return (ret); + + strp = (char *)(array + (orig - *listp) + 1); + + /* Copy the original information into the new memory. */ + for (orig = *listp, arrayp = array; *orig != NULL; ++orig, ++arrayp) { + len = strlen(*orig); + memcpy(strp, *orig, len + 1); + *arrayp = strp; + strp += len + 1; + + __os_freestr(*orig); + } + + /* NULL-terminate the list. */ + *arrayp = NULL; + + __os_free(*listp, 0); + *listp = array; + + return (0); +} + +static int +__cmpfunc(p1, p2) + const void *p1, *p2; +{ + return (strcmp(*((char * const *)p1), *((char * const *)p2))); +} diff --git a/bdb/log/log_auto.c b/bdb/log/log_auto.c new file mode 100644 index 00000000000..281296cc238 --- /dev/null +++ b/bdb/log/log_auto.c @@ -0,0 +1,326 @@ +/* Do not edit: automatically built by gen_rec.awk. */ +#include "db_config.h" + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <ctype.h> +#include <errno.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_dispatch.h" +#include "db_am.h" +#include "log.h" +#include "txn.h" + +int +__log_register1_print(dbenv, dbtp, lsnp, notused2, notused3) + DB_ENV *dbenv; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *notused3; +{ + __log_register1_args *argp; + u_int32_t i; + u_int ch; + int ret; + + i = 0; + ch = 0; + notused2 = DB_TXN_ABORT; + notused3 = NULL; + + if ((ret = __log_register1_read(dbenv, dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]log_register1: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\topcode: %lu\n", (u_long)argp->opcode); + printf("\tname: "); + for (i = 0; i < argp->name.size; i++) { + ch = ((u_int8_t *)argp->name.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); + else + printf("%#x ", ch); + } + printf("\n"); + printf("\tuid: "); + for (i = 0; i < argp->uid.size; i++) { + ch = ((u_int8_t *)argp->uid.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); + else + printf("%#x ", ch); + } + printf("\n"); + printf("\tfileid: %ld\n", (long)argp->fileid); + printf("\tftype: 0x%lx\n", (u_long)argp->ftype); + printf("\n"); + __os_free(argp, 0); + return (0); +} + +int +__log_register1_read(dbenv, recbuf, argpp) + DB_ENV *dbenv; + void *recbuf; + __log_register1_args **argpp; +{ + __log_register1_args *argp; + u_int8_t *bp; + int ret; + + ret = __os_malloc(dbenv, sizeof(__log_register1_args) + + sizeof(DB_TXN), NULL, &argp); + if (ret != 0) + return (ret); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->opcode, bp, sizeof(argp->opcode)); + bp += sizeof(argp->opcode); + memset(&argp->name, 0, sizeof(argp->name)); + memcpy(&argp->name.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->name.data = bp; + bp += argp->name.size; + memset(&argp->uid, 0, sizeof(argp->uid)); + memcpy(&argp->uid.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->uid.data = bp; + bp += argp->uid.size; + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->ftype, bp, sizeof(argp->ftype)); + bp += sizeof(argp->ftype); + *argpp = argp; + return (0); +} + +int +__log_register_log(dbenv, txnid, ret_lsnp, flags, + opcode, name, uid, fileid, ftype, meta_pgno) + DB_ENV *dbenv; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t opcode; + const DBT *name; + const DBT *uid; + int32_t fileid; + DBTYPE ftype; + db_pgno_t meta_pgno; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t zero; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_log_register; + if (txnid != NULL && + TAILQ_FIRST(&txnid->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + return (ret); + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + ZERO_LSN(null_lsn); + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(opcode) + + sizeof(u_int32_t) + (name == NULL ? 0 : name->size) + + sizeof(u_int32_t) + (uid == NULL ? 0 : uid->size) + + sizeof(fileid) + + sizeof(ftype) + + sizeof(meta_pgno); + if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0) + return (ret); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &opcode, sizeof(opcode)); + bp += sizeof(opcode); + if (name == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &name->size, sizeof(name->size)); + bp += sizeof(name->size); + memcpy(bp, name->data, name->size); + bp += name->size; + } + if (uid == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &uid->size, sizeof(uid->size)); + bp += sizeof(uid->size); + memcpy(bp, uid->data, uid->size); + bp += uid->size; + } + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &ftype, sizeof(ftype)); + bp += sizeof(ftype); + memcpy(bp, &meta_pgno, sizeof(meta_pgno)); + bp += sizeof(meta_pgno); + DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size); + ret = __log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + __os_free(logrec.data, logrec.size); + return (ret); +} + +int +__log_register_print(dbenv, dbtp, lsnp, notused2, notused3) + DB_ENV *dbenv; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *notused3; +{ + __log_register_args *argp; + u_int32_t i; + u_int ch; + int ret; + + i = 0; + ch = 0; + notused2 = DB_TXN_ABORT; + notused3 = NULL; + + if ((ret = __log_register_read(dbenv, dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]log_register: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\topcode: %lu\n", (u_long)argp->opcode); + printf("\tname: "); + for (i = 0; i < argp->name.size; i++) { + ch = ((u_int8_t *)argp->name.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); + else + printf("%#x ", ch); + } + printf("\n"); + printf("\tuid: "); + for (i = 0; i < argp->uid.size; i++) { + ch = ((u_int8_t *)argp->uid.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); + else + printf("%#x ", ch); + } + printf("\n"); + printf("\tfileid: %ld\n", (long)argp->fileid); + printf("\tftype: 0x%lx\n", (u_long)argp->ftype); + printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno); + printf("\n"); + __os_free(argp, 0); + return (0); +} + +int +__log_register_read(dbenv, recbuf, argpp) + DB_ENV *dbenv; + void *recbuf; + __log_register_args **argpp; +{ + __log_register_args *argp; + u_int8_t *bp; + int ret; + + ret = __os_malloc(dbenv, sizeof(__log_register_args) + + sizeof(DB_TXN), NULL, &argp); + if (ret != 0) + return (ret); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->opcode, bp, sizeof(argp->opcode)); + bp += sizeof(argp->opcode); + memset(&argp->name, 0, sizeof(argp->name)); + memcpy(&argp->name.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->name.data = bp; + bp += argp->name.size; + memset(&argp->uid, 0, sizeof(argp->uid)); + memcpy(&argp->uid.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->uid.data = bp; + bp += argp->uid.size; + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->ftype, bp, sizeof(argp->ftype)); + bp += sizeof(argp->ftype); + memcpy(&argp->meta_pgno, bp, sizeof(argp->meta_pgno)); + bp += sizeof(argp->meta_pgno); + *argpp = argp; + return (0); +} + +int +__log_init_print(dbenv) + DB_ENV *dbenv; +{ + int ret; + + if ((ret = __db_add_recovery(dbenv, + __log_register1_print, DB_log_register1)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __log_register_print, DB_log_register)) != 0) + return (ret); + return (0); +} + +int +__log_init_recover(dbenv) + DB_ENV *dbenv; +{ + int ret; + + if ((ret = __db_add_recovery(dbenv, + __deprecated_recover, DB_log_register1)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __log_register_recover, DB_log_register)) != 0) + return (ret); + return (0); +} + diff --git a/bdb/log/log_compare.c b/bdb/log/log_compare.c new file mode 100644 index 00000000000..9bc3c028a5f --- /dev/null +++ b/bdb/log/log_compare.c @@ -0,0 +1,34 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log_compare.c,v 11.3 2000/02/14 02:59:59 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#endif + +#include "db_int.h" + +/* + * log_compare -- + * Compare two LSN's; return 1, 0, -1 if first is >, == or < second. + */ +int +log_compare(lsn0, lsn1) + const DB_LSN *lsn0, *lsn1; +{ + if (lsn0->file != lsn1->file) + return (lsn0->file < lsn1->file ? -1 : 1); + + if (lsn0->offset != lsn1->offset) + return (lsn0->offset < lsn1->offset ? -1 : 1); + + return (0); +} diff --git a/bdb/log/log_findckp.c b/bdb/log/log_findckp.c new file mode 100644 index 00000000000..b1e8fddbdb7 --- /dev/null +++ b/bdb/log/log_findckp.c @@ -0,0 +1,135 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log_findckp.c,v 11.5 2000/11/30 00:58:40 ubell Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#endif + +#include "db_int.h" +#include "log.h" +#include "txn.h" + +/* + * __log_findckp -- + * + * Looks for the most recent checkpoint that occurs before the most recent + * checkpoint LSN, subject to the constraint that there must be at least two + * checkpoints. The reason you need two checkpoints is that you might have + * crashed during the most recent one and may not have a copy of all the + * open files. This is the point from which recovery can start and the + * point up to which archival/truncation can take place. Checkpoints in + * the log look like: + * + * ------------------------------------------------------------------- + * | ckp A, ckplsn 100 | .... record .... | ckp B, ckplsn 600 | ... + * ------------------------------------------------------------------- + * LSN 500 LSN 1000 + * + * If we read what log returns from using the DB_CKP parameter to logput, + * we'll get the record at LSN 1000. The checkpoint LSN there is 600. + * Now we have to scan backwards looking for a checkpoint before LSN 600. + * We find one at 500. This means that we can truncate the log before + * 500 or run recovery beginning at 500. + * + * Returns 0 if we find a suitable checkpoint or we retrieved the first + * record in the log from which to start. Returns DB_NOTFOUND if there + * are no log records, errno on error. + * + * PUBLIC: int __log_findckp __P((DB_ENV *, DB_LSN *)); + */ +int +__log_findckp(dbenv, lsnp) + DB_ENV *dbenv; + DB_LSN *lsnp; +{ + DBT data; + DB_LSN ckp_lsn, final_ckp, last_ckp, next_lsn; + __txn_ckp_args *ckp_args; + int ret; + + /* + * Need to find the appropriate point from which to begin + * recovery. + */ + memset(&data, 0, sizeof(data)); + if (F_ISSET(dbenv, DB_ENV_THREAD)) + F_SET(&data, DB_DBT_MALLOC); + ZERO_LSN(ckp_lsn); + if ((ret = log_get(dbenv, &last_ckp, &data, DB_CHECKPOINT)) != 0) { + if (ret == ENOENT) + goto get_first; + else + return (ret); + } + final_ckp = last_ckp; + + next_lsn = last_ckp; + do { + if (F_ISSET(dbenv, DB_ENV_THREAD)) + __os_free(data.data, data.size); + + if ((ret = log_get(dbenv, &next_lsn, &data, DB_SET)) != 0) + return (ret); + if ((ret = __txn_ckp_read(dbenv, data.data, &ckp_args)) != 0) { + if (F_ISSET(dbenv, DB_ENV_THREAD)) + __os_free(data.data, data.size); + return (ret); + } + if (IS_ZERO_LSN(ckp_lsn)) + ckp_lsn = ckp_args->ckp_lsn; + if (FLD_ISSET(dbenv->verbose, DB_VERB_CHKPOINT)) { + __db_err(dbenv, "Checkpoint at: [%lu][%lu]", + (u_long)last_ckp.file, (u_long)last_ckp.offset); + __db_err(dbenv, "Checkpoint LSN: [%lu][%lu]", + (u_long)ckp_args->ckp_lsn.file, + (u_long)ckp_args->ckp_lsn.offset); + __db_err(dbenv, "Previous checkpoint: [%lu][%lu]", + (u_long)ckp_args->last_ckp.file, + (u_long)ckp_args->last_ckp.offset); + } + last_ckp = next_lsn; + next_lsn = ckp_args->last_ckp; + __os_free(ckp_args, sizeof(*ckp_args)); + + /* + * Keep looping until either you 1) run out of checkpoints, + * 2) you've found a checkpoint before the most recent + * checkpoint's LSN and you have at least 2 checkpoints. + */ + } while (!IS_ZERO_LSN(next_lsn) && + (log_compare(&last_ckp, &ckp_lsn) > 0 || + log_compare(&final_ckp, &last_ckp) == 0)); + + if (F_ISSET(dbenv, DB_ENV_THREAD)) + __os_free(data.data, data.size); + + /* + * At this point, either, next_lsn is ZERO or ckp_lsn is the + * checkpoint lsn and last_ckp is the LSN of the last checkpoint + * before ckp_lsn. If the compare in the loop is still true, then + * next_lsn must be 0 and we need to roll forward from the + * beginning of the log. + */ + if (log_compare(&last_ckp, &ckp_lsn) >= 0 || + log_compare(&final_ckp, &last_ckp) == 0) { +get_first: if ((ret = log_get(dbenv, &last_ckp, &data, DB_FIRST)) != 0) + return (ret); + if (F_ISSET(dbenv, DB_ENV_THREAD)) + __os_free(data.data, data.size); + } + *lsnp = last_ckp; + + return (IS_ZERO_LSN(last_ckp) ? DB_NOTFOUND : 0); +} diff --git a/bdb/log/log_get.c b/bdb/log/log_get.c new file mode 100644 index 00000000000..b75d50a62fd --- /dev/null +++ b/bdb/log/log_get.c @@ -0,0 +1,465 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log_get.c,v 11.32 2001/01/11 18:19:53 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#include <unistd.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "db_page.h" +#include "log.h" +#include "hash.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +/* + * log_get -- + * Get a log record. + */ +int +log_get(dbenv, alsn, dbt, flags) + DB_ENV *dbenv; + DB_LSN *alsn; + DBT *dbt; + u_int32_t flags; +{ + DB_LOG *dblp; + DB_LSN saved_lsn; + int ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_get(dbenv, alsn, dbt, flags)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + /* Validate arguments. */ + if (flags != DB_CHECKPOINT && flags != DB_CURRENT && + flags != DB_FIRST && flags != DB_LAST && + flags != DB_NEXT && flags != DB_PREV && flags != DB_SET) + return (__db_ferr(dbenv, "log_get", 1)); + + if (F_ISSET(dbenv, DB_ENV_THREAD)) { + if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT) + return (__db_ferr(dbenv, "log_get", 1)); + if (!F_ISSET(dbt, + DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERMEM)) + return (__db_ferr(dbenv, "threaded data", 1)); + } + + dblp = dbenv->lg_handle; + R_LOCK(dbenv, &dblp->reginfo); + + /* + * The alsn field is only initialized if DB_SET is the flag, so this + * assignment causes uninitialized memory complaints for other flag + * values. + */ +#ifdef UMRW + if (flags == DB_SET) + saved_lsn = *alsn; + else + ZERO_LSN(saved_lsn); +#else + saved_lsn = *alsn; +#endif + + /* + * If we get one of the log's header records, repeat the operation. + * This assumes that applications don't ever request the log header + * records by LSN, but that seems reasonable to me. + */ + if ((ret = __log_get(dblp, + alsn, dbt, flags, 0)) == 0 && alsn->offset == 0) { + switch (flags) { + case DB_FIRST: + flags = DB_NEXT; + break; + case DB_LAST: + flags = DB_PREV; + break; + } + if (F_ISSET(dbt, DB_DBT_MALLOC)) { + __os_free(dbt->data, dbt->size); + dbt->data = NULL; + } + ret = __log_get(dblp, alsn, dbt, flags, 0); + } + if (ret != 0) + *alsn = saved_lsn; + + R_UNLOCK(dbenv, &dblp->reginfo); + + return (ret); +} + +/* + * __log_get -- + * Get a log record; internal version. + * + * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int)); + */ +int +__log_get(dblp, alsn, dbt, flags, silent) + DB_LOG *dblp; + DB_LSN *alsn; + DBT *dbt; + u_int32_t flags; + int silent; +{ + DB_ENV *dbenv; + DB_LSN nlsn; + HDR hdr; + LOG *lp; + const char *fail; + char *np, *tbuf; + int cnt, ret; + logfile_validity status; + size_t len, nr; + u_int32_t offset; + u_int8_t *p; + void *shortp, *readp; + + lp = dblp->reginfo.primary; + fail = np = tbuf = NULL; + dbenv = dblp->dbenv; + + nlsn = dblp->c_lsn; + switch (flags) { + case DB_CHECKPOINT: + nlsn = lp->chkpt_lsn; + if (IS_ZERO_LSN(nlsn)) { + /* No db_err. The caller may expect this. */ + ret = ENOENT; + goto err2; + } + break; + case DB_NEXT: /* Next log record. */ + if (!IS_ZERO_LSN(nlsn)) { + /* Increment the cursor by the cursor record size. */ + nlsn.offset += dblp->c_len; + break; + } + /* FALLTHROUGH */ + case DB_FIRST: /* Find the first log record. */ + /* Find the first log file. */ + if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0) + goto err2; + + /* + * We want any readable version, so either DB_LV_NORMAL + * or DB_LV_OLD_READABLE is acceptable here. If it's + * not one of those two, there is no first log record that + * we can read. + */ + if (status != DB_LV_NORMAL && status != DB_LV_OLD_READABLE) { + ret = DB_NOTFOUND; + goto err2; + } + + /* + * We may have only entered records in the buffer, and not + * yet written a log file. If no log files were found and + * there's anything in the buffer, it belongs to file 1. + */ + if (cnt == 0) + cnt = 1; + + nlsn.file = cnt; + nlsn.offset = 0; + break; + case DB_CURRENT: /* Current log record. */ + break; + case DB_PREV: /* Previous log record. */ + if (!IS_ZERO_LSN(nlsn)) { + /* If at start-of-file, move to the previous file. */ + if (nlsn.offset == 0) { + if (nlsn.file == 1 || + __log_valid(dblp, + nlsn.file - 1, 0, &status) != 0) + return (DB_NOTFOUND); + + if (status != DB_LV_NORMAL && + status != DB_LV_OLD_READABLE) + return (DB_NOTFOUND); + + --nlsn.file; + nlsn.offset = dblp->c_off; + } else + nlsn.offset = dblp->c_off; + break; + } + /* FALLTHROUGH */ + case DB_LAST: /* Last log record. */ + nlsn.file = lp->lsn.file; + nlsn.offset = lp->lsn.offset - lp->len; + break; + case DB_SET: /* Set log record. */ + nlsn = *alsn; + break; + } + + if (0) { /* Move to the next file. */ +next_file: ++nlsn.file; + nlsn.offset = 0; + } + + /* Return 1 if the request is past the end of the log. */ + if (nlsn.file > lp->lsn.file || + (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset)) + return (DB_NOTFOUND); + + /* If we've switched files, discard the current file handle. */ + if (dblp->c_lsn.file != nlsn.file && + F_ISSET(&dblp->c_fh, DB_FH_VALID)) { + (void)__os_closehandle(&dblp->c_fh); + } + + /* If the entire record is in the in-memory buffer, copy it out. */ + if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) { + /* Copy the header. */ + p = dblp->bufp + (nlsn.offset - lp->w_off); + memcpy(&hdr, p, sizeof(HDR)); + + /* Copy the record. */ + len = hdr.len - sizeof(HDR); + if ((ret = __db_retcopy(NULL, dbt, p + sizeof(HDR), + len, &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) + goto err2; + goto cksum; + } + + shortp = NULL; + + /* Acquire a file descriptor. */ + if (!F_ISSET(&dblp->c_fh, DB_FH_VALID)) { + if ((ret = __log_name(dblp, nlsn.file, + &np, &dblp->c_fh, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) { + fail = np; + goto err1; + } + __os_freestr(np); + np = NULL; + } + + /* See if we've already read this */ + if (nlsn.file == dblp->r_file && nlsn.offset > dblp->r_off + && nlsn.offset + sizeof(HDR) < dblp->r_off + dblp->r_size) + goto got_header; + + /* + * Seek to the header offset and read the header. Because the file + * may be pre-allocated, we have to make sure that we're not reading + * past the information in the start of the in-memory buffer. + */ + + readp = &hdr; + offset = nlsn.offset; + if (nlsn.file == lp->lsn.file && offset + sizeof(HDR) > lp->w_off) + nr = lp->w_off - offset; + else if (dblp->readbufp == NULL) + nr = sizeof(HDR); + else { + nr = lp->buffer_size; + readp = dblp->readbufp; + dblp->r_file = nlsn.file; + /* Going backwards. Put the current in the middle. */ + if (flags == DB_PREV || flags == DB_LAST) { + if (offset <= lp->buffer_size/2) + offset = 0; + else + offset = offset - lp->buffer_size/2; + } + if (nlsn.file == lp->lsn.file && offset + nr > lp->lsn.offset) + nr = lp->lsn.offset - offset; + dblp->r_off = offset; + } + + if ((ret = __os_seek(dblp->dbenv, + &dblp->c_fh, 0, 0, offset, 0, DB_OS_SEEK_SET)) != 0) { + fail = "seek"; + goto err1; + } + if ((ret = __os_read(dblp->dbenv, &dblp->c_fh, readp, nr, &nr)) != 0) { + fail = "read"; + goto err1; + } + if (nr < sizeof(HDR)) { + /* If read returns EOF, try the next file. */ + if (nr == 0) { + if (flags != DB_NEXT || nlsn.file == lp->lsn.file) + goto corrupt; + goto next_file; + } + + if (dblp->readbufp != NULL) + memcpy((u_int8_t *) &hdr, readp, nr); + + /* + * If read returns a short count the rest of the record has + * to be in the in-memory buffer. + */ + if (lp->b_off < sizeof(HDR) - nr) + goto corrupt; + + /* Get the rest of the header from the in-memory buffer. */ + memcpy((u_int8_t *)&hdr + nr, dblp->bufp, sizeof(HDR) - nr); + + if (hdr.len == 0) + goto next_file; + + shortp = dblp->bufp + (sizeof(HDR) - nr); + } + + else if (dblp->readbufp != NULL) { + dblp->r_size = nr; +got_header: memcpy((u_int8_t *)&hdr, + dblp->readbufp + (nlsn.offset - dblp->r_off), sizeof(HDR)); + } + + /* + * Check for buffers of 0's, that's what we usually see during recovery, + * although it's certainly not something on which we can depend. Check + * for impossibly large records. The malloc should fail later, but we + * have customers that run mallocs that handle allocation failure as a + * fatal error. + */ + if (hdr.len == 0) + goto next_file; + if (hdr.len <= sizeof(HDR) || hdr.len > lp->persist.lg_max) + goto corrupt; + len = hdr.len - sizeof(HDR); + + /* If we've already moved to the in-memory buffer, fill from there. */ + if (shortp != NULL) { + if (lp->b_off < ((u_int8_t *)shortp - dblp->bufp) + len) + goto corrupt; + if ((ret = __db_retcopy(NULL, dbt, shortp, len, + &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) + goto err2; + goto cksum; + } + + if (dblp->readbufp != NULL) { + if (nlsn.offset + hdr.len < dblp->r_off + dblp->r_size) { + if ((ret = __db_retcopy(NULL, dbt, dblp->readbufp + + (nlsn.offset - dblp->r_off) + sizeof(HDR), + len, &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) + goto err2; + goto cksum; + } else if ((ret = __os_seek(dblp->dbenv, &dblp->c_fh, 0, + 0, nlsn.offset + sizeof(HDR), 0, DB_OS_SEEK_SET)) != 0) { + fail = "seek"; + goto err1; + } + } + + /* + * Allocate temporary memory to hold the record. + * + * XXX + * We're calling malloc(3) with a region locked. This isn't + * a good idea. + */ + if ((ret = __os_malloc(dbenv, len, NULL, &tbuf)) != 0) + goto err1; + + /* + * Read the record into the buffer. If read returns a short count, + * there was an error or the rest of the record is in the in-memory + * buffer. Note, the information may be garbage if we're in recovery, + * so don't read past the end of the buffer's memory. + * + * Because the file may be pre-allocated, we have to make sure that + * we're not reading past the information in the start of the in-memory + * buffer. + */ + if (nlsn.file == lp->lsn.file && + nlsn.offset + sizeof(HDR) + len > lp->w_off) + nr = lp->w_off - (nlsn.offset + sizeof(HDR)); + else + nr = len; + if ((ret = __os_read(dblp->dbenv, &dblp->c_fh, tbuf, nr, &nr)) != 0) { + fail = "read"; + goto err1; + } + if (len - nr > lp->buffer_size) + goto corrupt; + if (nr != len) { + if (lp->b_off < len - nr) + goto corrupt; + + /* Get the rest of the record from the in-memory buffer. */ + memcpy((u_int8_t *)tbuf + nr, dblp->bufp, len - nr); + } + + /* Copy the record into the user's DBT. */ + if ((ret = __db_retcopy(NULL, dbt, tbuf, len, + &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) + goto err2; + __os_free(tbuf, 0); + tbuf = NULL; + +cksum: /* + * If the user specified a partial record read, the checksum can't + * match. It's not an obvious thing to do, but a user testing for + * the length of a record might do it. + */ + if (!F_ISSET(dbt, DB_DBT_PARTIAL) && + hdr.cksum != __ham_func4(NULL, dbt->data, dbt->size)) { + if (!silent) + __db_err(dbenv, "log_get: checksum mismatch"); + goto corrupt; + } + + /* Update the cursor and the return lsn. */ + dblp->c_off = hdr.prev; + dblp->c_len = hdr.len; + dblp->c_lsn = nlsn; + *alsn = nlsn; + + return (0); + +corrupt:/* + * This is the catchall -- for some reason we didn't find enough + * information or it wasn't reasonable information, and it wasn't + * because a system call failed. + */ + ret = EIO; + fail = "read"; + +err1: if (!silent) { + if (fail == NULL) + __db_err(dbenv, "log_get: %s", db_strerror(ret)); + else + __db_err(dbenv, + "log_get: %s: %s", fail, db_strerror(ret)); + } + +err2: if (np != NULL) + __os_freestr(np); + if (tbuf != NULL) + __os_free(tbuf, 0); + return (ret); +} diff --git a/bdb/log/log_method.c b/bdb/log/log_method.c new file mode 100644 index 00000000000..883f485d891 --- /dev/null +++ b/bdb/log/log_method.c @@ -0,0 +1,121 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log_method.c,v 11.14 2000/11/30 00:58:40 ubell Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "log.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +static int __log_set_lg_max __P((DB_ENV *, u_int32_t)); +static int __log_set_lg_bsize __P((DB_ENV *, u_int32_t)); +static int __log_set_lg_dir __P((DB_ENV *, const char *)); + +/* + * __log_dbenv_create -- + * Log specific initialization of the DB_ENV structure. + * + * PUBLIC: void __log_dbenv_create __P((DB_ENV *)); + */ +void +__log_dbenv_create(dbenv) + DB_ENV *dbenv; +{ + dbenv->lg_bsize = LG_BSIZE_DEFAULT; + dbenv->set_lg_bsize = __log_set_lg_bsize; + + dbenv->lg_max = LG_MAX_DEFAULT; + dbenv->set_lg_max = __log_set_lg_max; + + dbenv->set_lg_dir = __log_set_lg_dir; +#ifdef HAVE_RPC + /* + * If we have a client, overwrite what we just setup to + * point to client functions. + */ + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) { + dbenv->set_lg_bsize = __dbcl_set_lg_bsize; + dbenv->set_lg_max = __dbcl_set_lg_max; + dbenv->set_lg_dir = __dbcl_set_lg_dir; + } +#endif +} + +/* + * __log_set_lg_bsize -- + * Set the log buffer size. + */ +static int +__log_set_lg_bsize(dbenv, lg_bsize) + DB_ENV *dbenv; + u_int32_t lg_bsize; +{ + ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lg_bsize"); + + /* Let's not be silly. */ + if (lg_bsize > dbenv->lg_max / 4) { + __db_err(dbenv, "log buffer size must be <= log file size / 4"); + return (EINVAL); + } + + dbenv->lg_bsize = lg_bsize; + return (0); +} + +/* + * __log_set_lg_max -- + * Set the maximum log file size. + */ +static int +__log_set_lg_max(dbenv, lg_max) + DB_ENV *dbenv; + u_int32_t lg_max; +{ + ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lg_max"); + + /* Let's not be silly. */ + if (lg_max < dbenv->lg_bsize * 4) { + __db_err(dbenv, "log file size must be >= log buffer size * 4"); + return (EINVAL); + } + + dbenv->lg_max = lg_max; + return (0); +} + +/* + * __log_set_lg_dir -- + * Set the log file directory. + */ +static int +__log_set_lg_dir(dbenv, dir) + DB_ENV *dbenv; + const char *dir; +{ + if (dbenv->db_log_dir != NULL) + __os_freestr(dbenv->db_log_dir); + return (__os_strdup(dbenv, dir, &dbenv->db_log_dir)); +} diff --git a/bdb/log/log_put.c b/bdb/log/log_put.c new file mode 100644 index 00000000000..e5cdedb5493 --- /dev/null +++ b/bdb/log/log_put.c @@ -0,0 +1,701 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log_put.c,v 11.26 2000/11/30 00:58:40 ubell Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#if TIME_WITH_SYS_TIME +#include <sys/time.h> +#include <time.h> +#else +#if HAVE_SYS_TIME_H +#include <sys/time.h> +#else +#include <time.h> +#endif +#endif + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "db_page.h" +#include "log.h" +#include "hash.h" +#include "clib_ext.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t)); +static int __log_flush __P((DB_LOG *, const DB_LSN *)); +static int __log_newfh __P((DB_LOG *)); +static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t)); +static int __log_open_files __P((DB_ENV *)); +static int __log_write __P((DB_LOG *, void *, u_int32_t)); + +/* + * log_put -- + * Write a log record. + */ +int +log_put(dbenv, lsn, dbt, flags) + DB_ENV *dbenv; + DB_LSN *lsn; + const DBT *dbt; + u_int32_t flags; +{ + DB_LOG *dblp; + int ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_put(dbenv, lsn, dbt, flags)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + /* Validate arguments. */ + if (flags != 0 && flags != DB_CHECKPOINT && + flags != DB_CURLSN && flags != DB_FLUSH) + return (__db_ferr(dbenv, "log_put", 0)); + + dblp = dbenv->lg_handle; + R_LOCK(dbenv, &dblp->reginfo); + ret = __log_put(dbenv, lsn, dbt, flags); + R_UNLOCK(dbenv, &dblp->reginfo); + return (ret); +} + +/* + * __log_put -- + * Write a log record; internal version. + * + * PUBLIC: int __log_put __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); + */ +int +__log_put(dbenv, lsn, dbt, flags) + DB_ENV *dbenv; + DB_LSN *lsn; + const DBT *dbt; + u_int32_t flags; +{ + DBT t; + DB_LOG *dblp; + LOG *lp; + u_int32_t lastoff; + int ret; + + dblp = dbenv->lg_handle; + lp = dblp->reginfo.primary; + + /* + * If the application just wants to know where we are, fill in + * the information. Currently used by the transaction manager + * to avoid writing TXN_begin records. + */ + if (flags == DB_CURLSN) { + lsn->file = lp->lsn.file; + lsn->offset = lp->lsn.offset; + return (0); + } + + /* If this information won't fit in the file, swap files. */ + if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) { + if (sizeof(HDR) + + sizeof(LOGP) + dbt->size > lp->persist.lg_max) { + __db_err(dbenv, + "log_put: record larger than maximum file size"); + return (EINVAL); + } + + /* Flush the log. */ + if ((ret = __log_flush(dblp, NULL)) != 0) + return (ret); + + /* + * Save the last known offset from the previous file, we'll + * need it to initialize the persistent header information. + */ + lastoff = lp->lsn.offset; + + /* Point the current LSN to the new file. */ + ++lp->lsn.file; + lp->lsn.offset = 0; + + /* Reset the file write offset. */ + lp->w_off = 0; + } else + lastoff = 0; + + /* Initialize the LSN information returned to the user. */ + lsn->file = lp->lsn.file; + lsn->offset = lp->lsn.offset; + + /* + * Insert persistent information as the first record in every file. + * Note that the previous length is wrong for the very first record + * of the log, but that's okay, we check for it during retrieval. + */ + if (lp->lsn.offset == 0) { + t.data = &lp->persist; + t.size = sizeof(LOGP); + if ((ret = __log_putr(dblp, lsn, + &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0) + return (ret); + + /* + * Record files open in this log. + * If we are recovering then we are in the + * process of outputting the files, don't do + * it again. + */ + if (!F_ISSET(dblp, DBLOG_RECOVER) && + (ret = __log_open_files(dbenv)) != 0) + return (ret); + + /* Update the LSN information returned to the user. */ + lsn->file = lp->lsn.file; + lsn->offset = lp->lsn.offset; + } + + /* Write the application's log record. */ + if ((ret = __log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len)) != 0) + return (ret); + + /* + * On a checkpoint, we: + * Put out the checkpoint record (above). + * Save the LSN of the checkpoint in the shared region. + * Append the set of file name information into the log. + */ + if (flags == DB_CHECKPOINT) { + lp->chkpt_lsn = *lsn; + if ((ret = __log_open_files(dbenv)) != 0) + return (ret); + } + + /* + * On a checkpoint or when flush is requested, we: + * Flush the current buffer contents to disk. + * Sync the log to disk. + */ + if (flags == DB_FLUSH || flags == DB_CHECKPOINT) + if ((ret = __log_flush(dblp, NULL)) != 0) + return (ret); + + /* + * On a checkpoint, we: + * Save the time the checkpoint was written. + * Reset the bytes written since the last checkpoint. + */ + if (flags == DB_CHECKPOINT) { + (void)time(&lp->chkpt); + lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; + } + return (0); +} + +/* + * __log_putr -- + * Actually put a record into the log. + */ +static int +__log_putr(dblp, lsn, dbt, prev) + DB_LOG *dblp; + DB_LSN *lsn; + const DBT *dbt; + u_int32_t prev; +{ + HDR hdr; + LOG *lp; + int ret; + + lp = dblp->reginfo.primary; + + /* + * Initialize the header. If we just switched files, lsn.offset will + * be 0, and what we really want is the offset of the previous record + * in the previous file. Fortunately, prev holds the value we want. + */ + hdr.prev = prev; + hdr.len = sizeof(HDR) + dbt->size; + hdr.cksum = __ham_func4(NULL, dbt->data, dbt->size); + + if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0) + return (ret); + lp->len = sizeof(HDR); + lp->lsn.offset += sizeof(HDR); + + if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0) + return (ret); + lp->len += dbt->size; + lp->lsn.offset += dbt->size; + return (0); +} + +/* + * log_flush -- + * Write all records less than or equal to the specified LSN. + */ +int +log_flush(dbenv, lsn) + DB_ENV *dbenv; + const DB_LSN *lsn; +{ + DB_LOG *dblp; + int ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_flush(dbenv, lsn)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + dblp = dbenv->lg_handle; + R_LOCK(dbenv, &dblp->reginfo); + ret = __log_flush(dblp, lsn); + R_UNLOCK(dbenv, &dblp->reginfo); + return (ret); +} + +/* + * __log_flush -- + * Write all records less than or equal to the specified LSN; internal + * version. + */ +static int +__log_flush(dblp, lsn) + DB_LOG *dblp; + const DB_LSN *lsn; +{ + DB_LSN t_lsn; + LOG *lp; + int current, ret; + + ret = 0; + lp = dblp->reginfo.primary; + + /* + * If no LSN specified, flush the entire log by setting the flush LSN + * to the last LSN written in the log. Otherwise, check that the LSN + * isn't a non-existent record for the log. + */ + if (lsn == NULL) { + t_lsn.file = lp->lsn.file; + t_lsn.offset = lp->lsn.offset - lp->len; + lsn = &t_lsn; + } else + if (lsn->file > lp->lsn.file || + (lsn->file == lp->lsn.file && + lsn->offset > lp->lsn.offset - lp->len)) { + __db_err(dblp->dbenv, + "log_flush: LSN past current end-of-log"); + return (EINVAL); + } + + /* + * If the LSN is less than or equal to the last-sync'd LSN, we're done. + * Note, the last-sync LSN saved in s_lsn is the LSN of the first byte + * after the byte we absolutely know was written to disk, so the test + * is <, not <=. + */ + if (lsn->file < lp->s_lsn.file || + (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset)) + return (0); + + /* + * We may need to write the current buffer. We have to write the + * current buffer if the flush LSN is greater than or equal to the + * buffer's starting LSN. + */ + current = 0; + if (lp->b_off != 0 && log_compare(lsn, &lp->f_lsn) >= 0) { + if ((ret = __log_write(dblp, dblp->bufp, lp->b_off)) != 0) + return (ret); + + lp->b_off = 0; + current = 1; + } + + /* + * It's possible that this thread may never have written to this log + * file. Acquire a file descriptor if we don't already have one. + * One last check -- if we're not writing anything from the current + * buffer, don't bother. We have nothing to write and nothing to + * sync. + */ + if (dblp->lfname != lp->lsn.file) { + if (!current) + return (0); + if ((ret = __log_newfh(dblp)) != 0) + return (ret); + } + + /* Sync all writes to disk. */ + if ((ret = __os_fsync(dblp->dbenv, &dblp->lfh)) != 0) + return (__db_panic(dblp->dbenv, ret)); + ++lp->stat.st_scount; + + /* Set the last-synced LSN, using the on-disk write offset. */ + lp->s_lsn.file = lp->f_lsn.file; + lp->s_lsn.offset = lp->w_off; + + return (0); +} + +/* + * __log_fill -- + * Write information into the log. + */ +static int +__log_fill(dblp, lsn, addr, len) + DB_LOG *dblp; + DB_LSN *lsn; + void *addr; + u_int32_t len; +{ + LOG *lp; + u_int32_t bsize, nrec; + size_t nw, remain; + int ret; + + lp = dblp->reginfo.primary; + bsize = lp->buffer_size; + + while (len > 0) { /* Copy out the data. */ + /* + * If we're beginning a new buffer, note the user LSN to which + * the first byte of the buffer belongs. We have to know this + * when flushing the buffer so that we know if the in-memory + * buffer needs to be flushed. + */ + if (lp->b_off == 0) + lp->f_lsn = *lsn; + + /* + * If we're on a buffer boundary and the data is big enough, + * copy as many records as we can directly from the data. + */ + if (lp->b_off == 0 && len >= bsize) { + nrec = len / bsize; + if ((ret = __log_write(dblp, addr, nrec * bsize)) != 0) + return (ret); + addr = (u_int8_t *)addr + nrec * bsize; + len -= nrec * bsize; + ++lp->stat.st_wcount_fill; + continue; + } + + /* Figure out how many bytes we can copy this time. */ + remain = bsize - lp->b_off; + nw = remain > len ? len : remain; + memcpy(dblp->bufp + lp->b_off, addr, nw); + addr = (u_int8_t *)addr + nw; + len -= nw; + lp->b_off += nw; + + /* If we fill the buffer, flush it. */ + if (lp->b_off == bsize) { + if ((ret = __log_write(dblp, dblp->bufp, bsize)) != 0) + return (ret); + lp->b_off = 0; + ++lp->stat.st_wcount_fill; + } + } + return (0); +} + +/* + * __log_write -- + * Write the log buffer to disk. + */ +static int +__log_write(dblp, addr, len) + DB_LOG *dblp; + void *addr; + u_int32_t len; +{ + LOG *lp; + size_t nw; + int ret; + + /* + * If we haven't opened the log file yet or the current one + * has changed, acquire a new log file. + */ + lp = dblp->reginfo.primary; + if (!F_ISSET(&dblp->lfh, DB_FH_VALID) || dblp->lfname != lp->lsn.file) + if ((ret = __log_newfh(dblp)) != 0) + return (ret); + + /* + * Seek to the offset in the file (someone may have written it + * since we last did). + */ + if ((ret = + __os_seek(dblp->dbenv, + &dblp->lfh, 0, 0, lp->w_off, 0, DB_OS_SEEK_SET)) != 0 || + (ret = __os_write(dblp->dbenv, &dblp->lfh, addr, len, &nw)) != 0) + return (__db_panic(dblp->dbenv, ret)); + if (nw != len) { + __db_err(dblp->dbenv, "Short write while writing log"); + return (EIO); + } + + /* Reset the buffer offset and update the seek offset. */ + lp->w_off += len; + + /* Update written statistics. */ + if ((lp->stat.st_w_bytes += len) >= MEGABYTE) { + lp->stat.st_w_bytes -= MEGABYTE; + ++lp->stat.st_w_mbytes; + } + if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) { + lp->stat.st_wc_bytes -= MEGABYTE; + ++lp->stat.st_wc_mbytes; + } + ++lp->stat.st_wcount; + + return (0); +} + +/* + * log_file -- + * Map a DB_LSN to a file name. + */ +int +log_file(dbenv, lsn, namep, len) + DB_ENV *dbenv; + const DB_LSN *lsn; + char *namep; + size_t len; +{ + DB_LOG *dblp; + int ret; + char *name; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_file(dbenv, lsn, namep, len)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + dblp = dbenv->lg_handle; + R_LOCK(dbenv, &dblp->reginfo); + ret = __log_name(dblp, lsn->file, &name, NULL, 0); + R_UNLOCK(dbenv, &dblp->reginfo); + if (ret != 0) + return (ret); + + /* Check to make sure there's enough room and copy the name. */ + if (len < strlen(name) + 1) { + *namep = '\0'; + __db_err(dbenv, "log_file: name buffer is too short"); + return (EINVAL); + } + (void)strcpy(namep, name); + __os_freestr(name); + + return (0); +} + +/* + * __log_newfh -- + * Acquire a file handle for the current log file. + */ +static int +__log_newfh(dblp) + DB_LOG *dblp; +{ + LOG *lp; + int ret; + char *name; + + /* Close any previous file descriptor. */ + if (F_ISSET(&dblp->lfh, DB_FH_VALID)) + (void)__os_closehandle(&dblp->lfh); + + /* Get the path of the new file and open it. */ + lp = dblp->reginfo.primary; + dblp->lfname = lp->lsn.file; + + /* + * Adding DB_OSO_LOG to the flags may add additional platform-specific + * optimizations. On WinNT, the logfile is preallocated, which may + * have a time penalty at startup, but have better overall throughput. + * We are not certain that this works reliably, so enable at your own + * risk. + * + * XXX: + * Initialize the log file size. This is a hack to push the log's + * maximum size down into the Windows __os_open routine, because it + * wants to pre-allocate it. + */ + dblp->lfh.log_size = dblp->dbenv->lg_max; + if ((ret = __log_name(dblp, dblp->lfname, + &name, &dblp->lfh, + DB_OSO_CREATE |/* DB_OSO_LOG |*/ DB_OSO_SEQ)) != 0) + __db_err(dblp->dbenv, + "log_put: %s: %s", name, db_strerror(ret)); + + __os_freestr(name); + return (ret); +} + +/* + * __log_name -- + * Return the log name for a particular file, and optionally open it. + * + * PUBLIC: int __log_name __P((DB_LOG *, + * PUBLIC: u_int32_t, char **, DB_FH *, u_int32_t)); + */ +int +__log_name(dblp, filenumber, namep, fhp, flags) + DB_LOG *dblp; + u_int32_t filenumber, flags; + char **namep; + DB_FH *fhp; +{ + LOG *lp; + int ret; + char *oname; + char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20]; + + lp = dblp->reginfo.primary; + + /* + * !!! + * The semantics of this routine are bizarre. + * + * The reason for all of this is that we need a place where we can + * intercept requests for log files, and, if appropriate, check for + * both the old-style and new-style log file names. The trick is + * that all callers of this routine that are opening the log file + * read-only want to use an old-style file name if they can't find + * a match using a new-style name. The only down-side is that some + * callers may check for the old-style when they really don't need + * to, but that shouldn't mess up anything, and we only check for + * the old-style name when we've already failed to find a new-style + * one. + * + * Create a new-style file name, and if we're not going to open the + * file, return regardless. + */ + (void)snprintf(new, sizeof(new), LFNAME, filenumber); + if ((ret = __db_appname(dblp->dbenv, + DB_APP_LOG, NULL, new, 0, NULL, namep)) != 0 || fhp == NULL) + return (ret); + + /* Open the new-style file -- if we succeed, we're done. */ + if ((ret = __os_open(dblp->dbenv, + *namep, flags, lp->persist.mode, fhp)) == 0) + return (0); + + /* + * The open failed... if the DB_RDONLY flag isn't set, we're done, + * the caller isn't interested in old-style files. + */ + if (!LF_ISSET(DB_OSO_RDONLY)) { + __db_err(dblp->dbenv, + "%s: log file open failed: %s", *namep, db_strerror(ret)); + return (__db_panic(dblp->dbenv, ret)); + } + + /* Create an old-style file name. */ + (void)snprintf(old, sizeof(old), LFNAME_V1, filenumber); + if ((ret = __db_appname(dblp->dbenv, + DB_APP_LOG, NULL, old, 0, NULL, &oname)) != 0) + goto err; + + /* + * Open the old-style file -- if we succeed, we're done. Free the + * space allocated for the new-style name and return the old-style + * name to the caller. + */ + if ((ret = __os_open(dblp->dbenv, + oname, flags, lp->persist.mode, fhp)) == 0) { + __os_freestr(*namep); + *namep = oname; + return (0); + } + + /* + * Couldn't find either style of name -- return the new-style name + * for the caller's error message. If it's an old-style name that's + * actually missing we're going to confuse the user with the error + * message, but that implies that not only were we looking for an + * old-style name, but we expected it to exist and we weren't just + * looking for any log file. That's not a likely error. + */ +err: __os_freestr(oname); + return (ret); +} + +static int +__log_open_files(dbenv) + DB_ENV *dbenv; +{ + DB_LOG *dblp; + DB_LSN r_unused; + DBT fid_dbt, t; + FNAME *fnp; + LOG *lp; + int ret; + + dblp = dbenv->lg_handle; + lp = dblp->reginfo.primary; + + for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); + fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { + if (fnp->ref == 0) /* Entry not in use. */ + continue; + if (fnp->name_off != INVALID_ROFF) { + memset(&t, 0, sizeof(t)); + t.data = R_ADDR(&dblp->reginfo, fnp->name_off); + t.size = strlen(t.data) + 1; + } + memset(&fid_dbt, 0, sizeof(fid_dbt)); + fid_dbt.data = fnp->ufid; + fid_dbt.size = DB_FILE_ID_LEN; + /* + * Output LOG_CHECKPOINT records which will be + * processed during the OPENFILES pass of recovery. + * At the end of recovery we want to output the + * files that were open so that a future recovery + * run will have the correct files open during + * a backward pass. For this we output LOG_CLOSE + * records so that the files will be closed on + * the forward pass. + */ + if ((ret = __log_register_log(dbenv, + NULL, &r_unused, 0, + F_ISSET(dblp, DBLOG_RECOVER) ? LOG_CLOSE : LOG_CHECKPOINT, + fnp->name_off == INVALID_ROFF ? NULL : &t, + &fid_dbt, fnp->id, fnp->s_type, fnp->meta_pgno)) != 0) + return (ret); + } + return (0); +} diff --git a/bdb/log/log_rec.c b/bdb/log/log_rec.c new file mode 100644 index 00000000000..a871848295e --- /dev/null +++ b/bdb/log/log_rec.c @@ -0,0 +1,621 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log_rec.c,v 11.48 2001/01/11 18:19:53 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_am.h" +#include "log.h" + +static int __log_do_open __P((DB_ENV *, DB_LOG *, + u_int8_t *, char *, DBTYPE, int32_t, db_pgno_t)); +static int __log_open_file __P((DB_ENV *, DB_LOG *, __log_register_args *)); + +/* + * PUBLIC: int __log_register_recover + * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__log_register_recover(dbenv, dbtp, lsnp, op, info) + DB_ENV *dbenv; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + DB_ENTRY *dbe; + DB_LOG *logp; + DB *dbp; + __log_register_args *argp; + int do_rem, ret, t_ret; + + logp = dbenv->lg_handle; + dbp = NULL; + +#ifdef DEBUG_RECOVER + REC_PRINT(__log_register_print); +#endif + COMPQUIET(lsnp, NULL); + + if ((ret = __log_register_read(dbenv, dbtp->data, &argp)) != 0) + goto out; + + if ((argp->opcode == LOG_OPEN && + (DB_REDO(op) || op == DB_TXN_OPENFILES)) || + (argp->opcode == LOG_CLOSE && DB_UNDO(op))) { + /* + * If we are redoing an open or undoing a close, then we need + * to open a file. We must open the file even if + * the meta page is not yet written as we may be creating it. + */ + if (op == DB_TXN_OPENFILES) + F_SET(logp, DBLOG_FORCE_OPEN); + ret = __log_open_file(dbenv, logp, argp); + F_CLR(logp, DBLOG_FORCE_OPEN); + if (ret == ENOENT || ret == EINVAL) { + if (op == DB_TXN_OPENFILES && argp->name.size != 0 && + (ret = __db_txnlist_delete(dbenv, info, + argp->name.data, argp->fileid, 0)) != 0) + goto out; + ret = 0; + } + } else if (argp->opcode != LOG_CHECKPOINT) { + /* + * If we are undoing an open, then we need to close the file. + * + * If the file is deleted, then we can just ignore this close. + * Otherwise, we should usually have a valid dbp we should + * close or whose reference count should be decremented. + * However, if we shut down without closing a file, we may, in + * fact, not have the file open, and that's OK. + */ + do_rem = 0; + MUTEX_THREAD_LOCK(dbenv, logp->mutexp); + if (argp->fileid < logp->dbentry_cnt) { + dbe = &logp->dbentry[argp->fileid]; + + DB_ASSERT(dbe->refcount == 1); + + ret = __db_txnlist_close(info, + argp->fileid, dbe->count); + if ((dbp = TAILQ_FIRST(&dbe->dblist)) != NULL) + (void)log_unregister(dbenv, dbp); + do_rem = 1; + } + MUTEX_THREAD_UNLOCK(dbenv, logp->mutexp); + if (do_rem) { + (void)__log_rem_logid(logp, dbp, argp->fileid); + /* + * If remove or rename has closed the file, don't + * sync. + */ + if (dbp != NULL && + (t_ret = dbp->close(dbp, + dbp->mpf == NULL ? DB_NOSYNC : 0)) != 0 && ret == 0) + ret = t_ret; + } + } else if (DB_UNDO(op) || op == DB_TXN_OPENFILES) { + /* + * It's a checkpoint and we are rolling backward. It + * is possible that the system was shut down and thus + * ended with a stable checkpoint; this file was never + * closed and has therefore not been reopened yet. If + * so, we need to try to open it. + */ + ret = __log_open_file(dbenv, logp, argp); + if (ret == ENOENT || ret == EINVAL) { + if (argp->name.size != 0 && (ret = + __db_txnlist_delete(dbenv, info, + argp->name.data, argp->fileid, 0)) != 0) + goto out; + ret = 0; + } + } + +out: if (argp != NULL) + __os_free(argp, 0); + return (ret); +} + +/* + * __log_open_file -- + * Called during log_register recovery. Make sure that we have an + * entry in the dbentry table for this ndx. Returns 0 on success, + * non-zero on error. + */ +static int +__log_open_file(dbenv, lp, argp) + DB_ENV *dbenv; + DB_LOG *lp; + __log_register_args *argp; +{ + DB_ENTRY *dbe; + DB *dbp; + + /* + * We never re-open temporary files. Temp files are only + * useful during aborts in which case the dbp was entered + * when the file was registered. During recovery, we treat + * temp files as properly deleted files, allowing the open to + * fail and not reporting any errors when recovery fails to + * get a valid dbp from db_fileid_to_db. + */ + if (argp->name.size == 0) { + (void)__log_add_logid(dbenv, lp, NULL, argp->fileid); + return (ENOENT); + } + + /* + * Because of reference counting, we cannot automatically close files + * during recovery, so when we're opening, we have to check that the + * name we are opening is what we expect. If it's not, then we close + * the old file and open the new one. + */ + MUTEX_THREAD_LOCK(dbenv, lp->mutexp); + if (argp->fileid < lp->dbentry_cnt) + dbe = &lp->dbentry[argp->fileid]; + else + dbe = NULL; + + if (dbe != NULL) { + dbe->deleted = 0; + if ((dbp = TAILQ_FIRST(&dbe->dblist)) != NULL) { + if (dbp->meta_pgno != argp->meta_pgno || + memcmp(dbp->fileid, + argp->uid.data, DB_FILE_ID_LEN) != 0) { + MUTEX_THREAD_UNLOCK(dbenv, lp->mutexp); + goto reopen; + } + if (!F_ISSET(lp, DBLOG_RECOVER)) + dbe->refcount++; + MUTEX_THREAD_UNLOCK(dbenv, lp->mutexp); + return (0); + } + } + + MUTEX_THREAD_UNLOCK(dbenv, lp->mutexp); + if (0) { +reopen: (void)log_unregister(dbp->dbenv, dbp); + (void)__log_rem_logid(lp, dbp, argp->fileid); + dbp->close(dbp, 0); + } + + return (__log_do_open(dbenv, lp, + argp->uid.data, argp->name.data, + argp->ftype, argp->fileid, argp->meta_pgno)); +} + +/* + * log_reopen_file -- close and reopen a db file. + * Must be called when a metadata page changes. + * + * PUBLIC: int __log_reopen_file __P((DB_ENV *, + * PUBLIC: char *, int32_t, u_int8_t *, db_pgno_t)); + * + */ +int +__log_reopen_file(dbenv, name, ndx, fileid, meta_pgno) + DB_ENV *dbenv; + char *name; + int32_t ndx; + u_int8_t *fileid; + db_pgno_t meta_pgno; +{ + DB *dbp; + DB_LOG *logp; + DBTYPE ftype; + FNAME *fnp; + LOG *lp; + char *tmp_name; + int ret; + + logp = dbenv->lg_handle; + + if (name == NULL) { + R_LOCK(dbenv, &logp->reginfo); + + lp = logp->reginfo.primary; + + for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); + fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { + if (fnp->ref == 0) /* Entry not in use. */ + continue; + if (memcmp(fnp->ufid, fileid, DB_FILE_ID_LEN) == 0) + break; + } + + if (fnp == 0 || fnp->name_off == INVALID_ROFF) { + __db_err(dbenv, + "metasub recover: non-existent file id"); + return (EINVAL); + } + + name = R_ADDR(&logp->reginfo, fnp->name_off); + ret = __os_strdup(dbenv, name, &tmp_name); + R_UNLOCK(dbenv, &logp->reginfo); + if (ret != 0) + goto out; + name = tmp_name; + } else + tmp_name = NULL; + + if ((ret = __db_fileid_to_db(dbenv, &dbp, ndx, 0)) != 0) + goto out; + ftype = dbp->type; + (void)log_unregister(dbenv, dbp); + (void)__log_rem_logid(logp, dbp, ndx); + (void)dbp->close(dbp, 0); + + ret = __log_do_open(dbenv, logp, fileid, name, ftype, ndx, meta_pgno); + + if (tmp_name != NULL) + __os_free(tmp_name, 0); + +out: return (ret); +} + +/* + * __log_do_open -- + * Open files referenced in the log. This is the part of the open that + * is not protected by the thread mutex. + */ +static int +__log_do_open(dbenv, lp, uid, name, ftype, ndx, meta_pgno) + DB_ENV *dbenv; + DB_LOG *lp; + u_int8_t *uid; + char *name; + DBTYPE ftype; + int32_t ndx; + db_pgno_t meta_pgno; +{ + DB *dbp; + int ret; + u_int8_t zeroid[DB_FILE_ID_LEN]; + + if ((ret = db_create(&dbp, lp->dbenv, 0)) != 0) + return (ret); + + dbp->log_fileid = ndx; + + /* + * This is needed to signal to the locking routines called while + * opening databases that we are potentially undoing a transaction + * from an XA process. Since the XA process does not share + * locks with the aborting transaction this prevents us from + * deadlocking during the open during rollback. + * Because this routine is called either during recovery or during an + * XA_ABORT, we can safely set DB_AM_RECOVER in the dbp since it + * will not be shared with other threads. + */ + F_SET(dbp, DB_AM_RECOVER); + if (meta_pgno != PGNO_BASE_MD) + memcpy(dbp->fileid, uid, DB_FILE_ID_LEN); + dbp->type = ftype; + if ((ret = + __db_dbopen(dbp, name, 0, __db_omode("rw----"), meta_pgno)) == 0) { + /* + * Verify that we are opening the same file that we were + * referring to when we wrote this log record. + */ + if (memcmp(uid, dbp->fileid, DB_FILE_ID_LEN) != 0) { + memset(zeroid, 0, DB_FILE_ID_LEN); + if (memcmp(dbp->fileid, zeroid, DB_FILE_ID_LEN) != 0) + goto not_right; + memcpy(dbp->fileid, uid, DB_FILE_ID_LEN); + } + if (IS_RECOVERING(dbenv)) { + (void)log_register(dbp->dbenv, dbp, name); + (void)__log_add_logid(dbenv, lp, dbp, ndx); + } + return (0); + } + +not_right: + (void)dbp->close(dbp, 0); + (void)__log_add_logid(dbenv, lp, NULL, ndx); + + return (ENOENT); +} + +/* + * __log_add_logid -- + * Adds a DB entry to the log's DB entry table. + * + * PUBLIC: int __log_add_logid __P((DB_ENV *, DB_LOG *, DB *, int32_t)); + */ +int +__log_add_logid(dbenv, logp, dbp, ndx) + DB_ENV *dbenv; + DB_LOG *logp; + DB *dbp; + int32_t ndx; +{ + DB *dbtmp; + int32_t i; + int ret; + + ret = 0; + + MUTEX_THREAD_LOCK(dbenv, logp->mutexp); + + /* + * Check if we need to grow the table. Note, ndx is 0-based (the + * index into the DB entry table) an dbentry_cnt is 1-based, the + * number of available slots. + */ + if (logp->dbentry_cnt <= ndx) { + if ((ret = __os_realloc(dbenv, + (ndx + DB_GROW_SIZE) * sizeof(DB_ENTRY), + NULL, &logp->dbentry)) != 0) + goto err; + + /* + * We have moved the head of the queue. + * Fix up the queue header of an empty queue or the previous + * pointer of the first element. + */ + for (i = 0; i < logp->dbentry_cnt; i++) { + if ((dbtmp = + TAILQ_FIRST(&logp->dbentry[i].dblist)) == NULL) + TAILQ_INIT(&logp->dbentry[i].dblist); + else + TAILQ_REINSERT_HEAD( + &logp->dbentry[i].dblist, dbp, links); + } + + /* Initialize the new entries. */ + for (i = logp->dbentry_cnt; i < ndx + DB_GROW_SIZE; i++) { + logp->dbentry[i].count = 0; + TAILQ_INIT(&logp->dbentry[i].dblist); + logp->dbentry[i].deleted = 0; + logp->dbentry[i].refcount = 0; + } + + logp->dbentry_cnt = i; + } + + if (logp->dbentry[ndx].deleted == 0 && + TAILQ_FIRST(&logp->dbentry[ndx].dblist) == NULL) { + logp->dbentry[ndx].count = 0; + if (dbp != NULL) + TAILQ_INSERT_HEAD(&logp->dbentry[ndx].dblist, + dbp, links); + logp->dbentry[ndx].deleted = dbp == NULL; + logp->dbentry[ndx].refcount = 1; + } else if (!F_ISSET(logp, DBLOG_RECOVER)) { + if (dbp != NULL) + TAILQ_INSERT_HEAD(&logp->dbentry[ndx].dblist, + dbp, links); + logp->dbentry[ndx].refcount++; + } + +err: MUTEX_THREAD_UNLOCK(dbenv, logp->mutexp); + return (ret); +} + +/* + * __db_fileid_to_db -- + * Return the DB corresponding to the specified fileid. + * + * PUBLIC: int __db_fileid_to_db __P((DB_ENV *, DB **, int32_t, int)); + */ +int +__db_fileid_to_db(dbenv, dbpp, ndx, inc) + DB_ENV *dbenv; + DB **dbpp; + int32_t ndx; + int inc; +{ + DB_LOG *logp; + DB *dbp; + FNAME *fname; + int ret; + char *name; + + ret = 0; + logp = dbenv->lg_handle; + + MUTEX_THREAD_LOCK(dbenv, logp->mutexp); + + /* + * Under XA, a process different than the one issuing DB operations + * may abort a transaction. In this case, recovery routines are run + * by a process that does not necessarily have the file open, so we + * we must open the file explicitly. + */ + if (ndx >= logp->dbentry_cnt || + (!logp->dbentry[ndx].deleted && + (dbp = TAILQ_FIRST(&logp->dbentry[ndx].dblist)) == NULL)) { + if (F_ISSET(logp, DBLOG_RECOVER)) { + ret = ENOENT; + goto err; + } + if (__log_lid_to_fname(logp, ndx, &fname) != 0) { + /* Couldn't find entry; this is a fatal error. */ + __db_err(dbenv, "Missing log fileid entry"); + ret = EINVAL; + goto err; + } + name = R_ADDR(&logp->reginfo, fname->name_off); + + /* + * __log_do_open is called without protection of the + * log thread lock. + */ + MUTEX_THREAD_UNLOCK(dbenv, logp->mutexp); + + /* + * At this point, we are not holding the thread lock, so exit + * directly instead of going through the exit code at the + * bottom. If the __log_do_open succeeded, then we don't need + * to do any of the remaining error checking at the end of this + * routine. + */ + if ((ret = __log_do_open(dbenv, logp, + fname->ufid, name, fname->s_type, + ndx, fname->meta_pgno)) != 0) + return (ret); + + *dbpp = TAILQ_FIRST(&logp->dbentry[ndx].dblist); + return (0); + } + + /* + * Return DB_DELETED if the file has been deleted (it's not an error). + */ + if (logp->dbentry[ndx].deleted) { + ret = DB_DELETED; + if (inc) + logp->dbentry[ndx].count++; + goto err; + } + + /* + * Otherwise return 0, but if we don't have a corresponding DB, it's + * an error. + */ + if ((*dbpp = TAILQ_FIRST(&logp->dbentry[ndx].dblist)) == NULL) + ret = ENOENT; + +err: MUTEX_THREAD_UNLOCK(dbenv, logp->mutexp); + return (ret); +} + +/* + * __log_close_files -- + * Close files that were opened by the recovery daemon. We sync the + * file, unless its mpf pointer has been NULLed by a db_remove or + * db_rename. We may not have flushed the log_register record that + * closes the file. + * + * PUBLIC: void __log_close_files __P((DB_ENV *)); + */ +void +__log_close_files(dbenv) + DB_ENV *dbenv; +{ + DB_ENTRY *dbe; + DB_LOG *logp; + DB *dbp; + int32_t i; + + logp = dbenv->lg_handle; + MUTEX_THREAD_LOCK(dbenv, logp->mutexp); + for (i = 0; i < logp->dbentry_cnt; i++) { + dbe = &logp->dbentry[i]; + while ((dbp = TAILQ_FIRST(&dbe->dblist)) != NULL) { + (void)log_unregister(dbenv, dbp); + TAILQ_REMOVE(&dbe->dblist, dbp, links); + (void)dbp->close(dbp, dbp->mpf == NULL ? DB_NOSYNC : 0); + } + dbe->deleted = 0; + dbe->refcount = 0; + } + MUTEX_THREAD_UNLOCK(dbenv, logp->mutexp); +} + +/* + * __log_rem_logid + * Remove an entry from the log table. Find the appropriate DB and + * unlink it from the linked list off the table. If the DB is NULL, treat + * this as a simple refcount decrement. + * + * PUBLIC: void __log_rem_logid __P((DB_LOG *, DB *, int32_t)); + */ +void +__log_rem_logid(logp, dbp, ndx) + DB_LOG *logp; + DB *dbp; + int32_t ndx; +{ + DB *xdbp; + + MUTEX_THREAD_LOCK(logp->dbenv, logp->mutexp); + if (--logp->dbentry[ndx].refcount == 0) { + TAILQ_INIT(&logp->dbentry[ndx].dblist); + logp->dbentry[ndx].deleted = 0; + } else if (dbp != NULL) + for (xdbp = TAILQ_FIRST(&logp->dbentry[ndx].dblist); + xdbp != NULL; + xdbp = TAILQ_NEXT(xdbp, links)) + if (xdbp == dbp) { + TAILQ_REMOVE(&logp->dbentry[ndx].dblist, + xdbp, links); + break; + } + + MUTEX_THREAD_UNLOCK(logp->dbenv, logp->mutexp); +} + +/* + * __log_lid_to_fname -- + * Traverse the shared-memory region looking for the entry that + * matches the passed log fileid. Returns 0 on success; -1 on error. + * PUBLIC: int __log_lid_to_fname __P((DB_LOG *, int32_t, FNAME **)); + */ +int +__log_lid_to_fname(dblp, lid, fnamep) + DB_LOG *dblp; + int32_t lid; + FNAME **fnamep; +{ + FNAME *fnp; + LOG *lp; + + lp = dblp->reginfo.primary; + + for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); + fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { + if (fnp->ref == 0) /* Entry not in use. */ + continue; + if (fnp->id == lid) { + *fnamep = fnp; + return (0); + } + } + return (-1); +} diff --git a/bdb/log/log_register.c b/bdb/log/log_register.c new file mode 100644 index 00000000000..1e0e523d8b9 --- /dev/null +++ b/bdb/log/log_register.c @@ -0,0 +1,433 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log_register.c,v 11.35 2001/01/10 16:04:19 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "log.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +/* + * log_register -- + * Register a file name. + */ +int +log_register(dbenv, dbp, name) + DB_ENV *dbenv; + DB *dbp; + const char *name; +{ + DBT fid_dbt, r_name; + DB_LOG *dblp; + DB_LSN r_unused; + FNAME *found_fnp, *fnp, *recover_fnp, *reuse_fnp; + LOG *lp; + size_t len; + int32_t maxid; + int inserted, ok, ret; + void *namep; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_register(dbenv, dbp, name)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + dblp = dbenv->lg_handle; + lp = dblp->reginfo.primary; + fnp = reuse_fnp = NULL; + inserted = ret = 0; + namep = NULL; + + /* Check the arguments. */ + if (dbp->type != DB_BTREE && dbp->type != DB_QUEUE && + dbp->type != DB_HASH && dbp->type != DB_RECNO) { + __db_err(dbenv, "log_register: unknown DB file type"); + return (EINVAL); + } + + R_LOCK(dbenv, &dblp->reginfo); + + /* + * See if we've already got this file in the log, finding the + * (maximum+1) in-use file id and some available file id (if we + * find an available fid, we'll use it, else we'll have to allocate + * one after the maximum that we found). + */ + ok = 0; + found_fnp = recover_fnp = NULL; + for (maxid = 0, fnp = SH_TAILQ_FIRST(&lp->fq, __fname); + fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { + if (F_ISSET(dblp, DBLOG_RECOVER) && fnp->id == dbp->log_fileid) + recover_fnp = fnp; + if (fnp->ref == 0) { /* Entry is not in use. */ + if (reuse_fnp == NULL) + reuse_fnp = fnp; + continue; + } + if (memcmp(dbp->fileid, fnp->ufid, DB_FILE_ID_LEN) == 0) { + if (fnp->meta_pgno == 0) { + if (fnp->locked == 1) { + __db_err(dbenv, "File is locked"); + return (EINVAL); + } + if (found_fnp != NULL) { + fnp = found_fnp; + goto found; + } + ok = 1; + } + if (dbp->meta_pgno == fnp->meta_pgno) { + if (F_ISSET(dblp, DBLOG_RECOVER)) { + if (fnp->id != dbp->log_fileid) { + /* + * If we are in recovery, there + * is only one dbp on the list. + * If the refcount goes to 0, + * we will clear the list. If + * it doesn't, we want to leave + * the dbp where it is, so + * passing a NULL to rem_logid + * is correct. + */ + __log_rem_logid(dblp, + NULL, fnp->id); + if (recover_fnp != NULL) + break; + continue; + } + fnp->ref = 1; + goto found; + } + ++fnp->ref; + if (ok) + goto found; + found_fnp = fnp; + } + } + if (maxid <= fnp->id) + maxid = fnp->id + 1; + } + if ((fnp = found_fnp) != NULL) + goto found; + + /* Fill in fnp structure. */ + if (recover_fnp != NULL) /* This has the right number */ + fnp = recover_fnp; + else if (reuse_fnp != NULL) /* Reuse existing one. */ + fnp = reuse_fnp; + else { /* Allocate a new one. */ + if ((ret = __db_shalloc(dblp->reginfo.addr, + sizeof(FNAME), 0, &fnp)) != 0) + goto mem_err; + fnp->id = maxid; + } + + if (F_ISSET(dblp, DBLOG_RECOVER)) + fnp->id = dbp->log_fileid; + + fnp->ref = 1; + fnp->locked = 0; + fnp->s_type = dbp->type; + memcpy(fnp->ufid, dbp->fileid, DB_FILE_ID_LEN); + fnp->meta_pgno = dbp->meta_pgno; + + if (name != NULL) { + len = strlen(name) + 1; + if ((ret = + __db_shalloc(dblp->reginfo.addr, len, 0, &namep)) != 0) { +mem_err: __db_err(dbenv, + "Unable to allocate memory to register %s", name); + goto err; + } + fnp->name_off = R_OFFSET(&dblp->reginfo, namep); + memcpy(namep, name, len); + } else + fnp->name_off = INVALID_ROFF; + + /* Only do the insert if we allocated a new fnp. */ + if (reuse_fnp == NULL && recover_fnp == NULL) + SH_TAILQ_INSERT_HEAD(&lp->fq, fnp, q, __fname); + inserted = 1; + + /* Log the registry. */ + if (!F_ISSET(dblp, DBLOG_RECOVER)) { + /* + * We allow logging on in-memory databases, so the name here + * could be NULL. + */ + if (name != NULL) { + r_name.data = (void *)name; + r_name.size = strlen(name) + 1; + } + memset(&fid_dbt, 0, sizeof(fid_dbt)); + fid_dbt.data = dbp->fileid; + fid_dbt.size = DB_FILE_ID_LEN; + if ((ret = __log_register_log(dbenv, NULL, &r_unused, + 0, LOG_OPEN, name == NULL ? NULL : &r_name, + &fid_dbt, fnp->id, dbp->type, dbp->meta_pgno)) != 0) + goto err; + } + +found: /* + * If we found the entry in the shared area, then the file is + * already open, so there is no need to log the open. We only + * log the open and closes on the first open and last close. + */ + if (!F_ISSET(dblp, DBLOG_RECOVER) && + (ret = __log_add_logid(dbenv, dblp, dbp, fnp->id)) != 0) + goto err; + + if (!F_ISSET(dblp, DBLOG_RECOVER)) + dbp->log_fileid = fnp->id; + + if (0) { +err: if (inserted) + SH_TAILQ_REMOVE(&lp->fq, fnp, q, __fname); + if (namep != NULL) + __db_shalloc_free(dblp->reginfo.addr, namep); + if (fnp != NULL) + __db_shalloc_free(dblp->reginfo.addr, fnp); + } + + R_UNLOCK(dbenv, &dblp->reginfo); + + return (ret); +} + +/* + * log_unregister -- + * Discard a registered file name. + */ +int +log_unregister(dbenv, dbp) + DB_ENV *dbenv; + DB *dbp; +{ + int ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_unregister(dbenv, dbp)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + ret = __log_filelist_update(dbenv, dbp, dbp->log_fileid, NULL, NULL); + dbp->log_fileid = DB_LOGFILEID_INVALID; + return (ret); +} + +/* + * PUBLIC: int __log_filelist_update + * PUBLIC: __P((DB_ENV *, DB *, int32_t, const char *, int *)); + * + * Utility player for updating and logging the file list. Called + * for 3 reasons: + * 1) mark file closed: newname == NULL. + * 2) change filename: newname != NULL. + * 3) from recovery to verify & change filename if necessary, set != NULL. + */ +int +__log_filelist_update(dbenv, dbp, fid, newname, set) + DB_ENV *dbenv; + DB *dbp; + int32_t fid; + const char *newname; + int *set; +{ + DBT fid_dbt, r_name; + DB_LOG *dblp; + DB_LSN r_unused; + FNAME *fnp; + LOG *lp; + u_int32_t len, newlen; + int ret; + void *namep; + + ret = 0; + dblp = dbenv->lg_handle; + lp = dblp->reginfo.primary; + + R_LOCK(dbenv, &dblp->reginfo); + + /* Find the entry in the log. */ + for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); + fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) + if (fid == fnp->id) + break; + if (fnp == NULL) { + __db_err(dbenv, "log_unregister: non-existent file id"); + ret = EINVAL; + goto ret1; + } + + /* + * Log the unregistry only if this is the last one and we are + * really closing the file or if this is an abort of a created + * file and we need to make sure there is a record in the log. + */ + namep = NULL; + len = 0; + if (fnp->name_off != INVALID_ROFF) { + namep = R_ADDR(&dblp->reginfo, fnp->name_off); + len = strlen(namep) + 1; + } + if (!F_ISSET(dblp, DBLOG_RECOVER) && fnp->ref == 1) { + if (namep != NULL) { + memset(&r_name, 0, sizeof(r_name)); + r_name.data = namep; + r_name.size = len; + } + memset(&fid_dbt, 0, sizeof(fid_dbt)); + fid_dbt.data = fnp->ufid; + fid_dbt.size = DB_FILE_ID_LEN; + if ((ret = __log_register_log(dbenv, NULL, &r_unused, + 0, LOG_CLOSE, + fnp->name_off == INVALID_ROFF ? NULL : &r_name, + &fid_dbt, fid, fnp->s_type, fnp->meta_pgno)) + != 0) + goto ret1; + } + + /* + * If we are changing the name we must log this fact. + */ + if (newname != NULL) { + DB_ASSERT(fnp->ref == 1); + newlen = strlen(newname) + 1; + if (!F_ISSET(dblp, DBLOG_RECOVER)) { + r_name.data = (void *) newname; + r_name.size = newlen; + if ((ret = __log_register_log(dbenv, + NULL, &r_unused, 0, LOG_OPEN, &r_name, &fid_dbt, + fnp->id, fnp->s_type, fnp->meta_pgno)) != 0) + goto ret1; + } + + /* + * Check to see if the name is already correct. + */ + if (set != NULL) { + if (len != newlen || memcmp(namep, newname, len) != 0) + *set = 1; + else { + *set = 0; + goto ret1; + } + } + + /* + * Change the name, realloc memory if necessary + */ + if (len < newlen) { + __db_shalloc_free(dblp->reginfo.addr, + R_ADDR(&dblp->reginfo, fnp->name_off)); + if ((ret = __db_shalloc( + dblp->reginfo.addr, newlen, 0, &namep)) != 0) { + __db_err(dbenv, + "Unable to allocate memory to register %s", + newname); + goto ret1; + } + fnp->name_off = R_OFFSET(&dblp->reginfo, namep); + } else + namep = R_ADDR(&dblp->reginfo, fnp->name_off); + memcpy(namep, newname, newlen); + } else { + + /* + * If more than 1 reference, just decrement the reference + * and return. Otherwise, free the name if one exists. + */ + DB_ASSERT(fnp->ref >= 1); + --fnp->ref; + if (fnp->ref == 0) { + if (fnp->name_off != INVALID_ROFF) + __db_shalloc_free(dblp->reginfo.addr, + R_ADDR(&dblp->reginfo, fnp->name_off)); + fnp->name_off = INVALID_ROFF; + } + + /* + * Remove from the process local table. If this + * operation is taking place during recovery, then + * the logid was never added to the table, so do not remove it. + */ + if (!F_ISSET(dblp, DBLOG_RECOVER)) + __log_rem_logid(dblp, dbp, fid); + } + +ret1: R_UNLOCK(dbenv, &dblp->reginfo); + return (ret); +} + +/* + * __log_file_lock -- lock a file for single access + * This only works if logging is on. + * + * PUBLIC: int __log_file_lock __P((DB *)); + */ +int +__log_file_lock(dbp) + DB *dbp; +{ + DB_ENV *dbenv; + DB_LOG *dblp; + FNAME *fnp; + LOG *lp; + int ret; + + dbenv = dbp->dbenv; + dblp = dbenv->lg_handle; + lp = dblp->reginfo.primary; + + ret = 0; + R_LOCK(dbenv, &dblp->reginfo); + + for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); + fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { + if (fnp->ref == 0) + continue; + + if (!memcmp(dbp->fileid, fnp->ufid, DB_FILE_ID_LEN)) { + if (fnp->meta_pgno == 0) { + if (fnp->ref != 1) + goto err; + + fnp->locked = 1; + } else { +err: __db_err(dbp->dbenv, "File is open"); + ret = EINVAL; + goto done; + } + + } + } +done: R_UNLOCK(dbenv, &dblp->reginfo); + return (ret); +} |