diff options
Diffstat (limited to 'bdb/log/log_put.c')
-rw-r--r-- | bdb/log/log_put.c | 701 |
1 files changed, 701 insertions, 0 deletions
diff --git a/bdb/log/log_put.c b/bdb/log/log_put.c new file mode 100644 index 00000000000..e5cdedb5493 --- /dev/null +++ b/bdb/log/log_put.c @@ -0,0 +1,701 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log_put.c,v 11.26 2000/11/30 00:58:40 ubell Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#if TIME_WITH_SYS_TIME +#include <sys/time.h> +#include <time.h> +#else +#if HAVE_SYS_TIME_H +#include <sys/time.h> +#else +#include <time.h> +#endif +#endif + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "db_page.h" +#include "log.h" +#include "hash.h" +#include "clib_ext.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t)); +static int __log_flush __P((DB_LOG *, const DB_LSN *)); +static int __log_newfh __P((DB_LOG *)); +static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t)); +static int __log_open_files __P((DB_ENV *)); +static int __log_write __P((DB_LOG *, void *, u_int32_t)); + +/* + * log_put -- + * Write a log record. + */ +int +log_put(dbenv, lsn, dbt, flags) + DB_ENV *dbenv; + DB_LSN *lsn; + const DBT *dbt; + u_int32_t flags; +{ + DB_LOG *dblp; + int ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_put(dbenv, lsn, dbt, flags)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + /* Validate arguments. */ + if (flags != 0 && flags != DB_CHECKPOINT && + flags != DB_CURLSN && flags != DB_FLUSH) + return (__db_ferr(dbenv, "log_put", 0)); + + dblp = dbenv->lg_handle; + R_LOCK(dbenv, &dblp->reginfo); + ret = __log_put(dbenv, lsn, dbt, flags); + R_UNLOCK(dbenv, &dblp->reginfo); + return (ret); +} + +/* + * __log_put -- + * Write a log record; internal version. + * + * PUBLIC: int __log_put __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); + */ +int +__log_put(dbenv, lsn, dbt, flags) + DB_ENV *dbenv; + DB_LSN *lsn; + const DBT *dbt; + u_int32_t flags; +{ + DBT t; + DB_LOG *dblp; + LOG *lp; + u_int32_t lastoff; + int ret; + + dblp = dbenv->lg_handle; + lp = dblp->reginfo.primary; + + /* + * If the application just wants to know where we are, fill in + * the information. Currently used by the transaction manager + * to avoid writing TXN_begin records. + */ + if (flags == DB_CURLSN) { + lsn->file = lp->lsn.file; + lsn->offset = lp->lsn.offset; + return (0); + } + + /* If this information won't fit in the file, swap files. */ + if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) { + if (sizeof(HDR) + + sizeof(LOGP) + dbt->size > lp->persist.lg_max) { + __db_err(dbenv, + "log_put: record larger than maximum file size"); + return (EINVAL); + } + + /* Flush the log. */ + if ((ret = __log_flush(dblp, NULL)) != 0) + return (ret); + + /* + * Save the last known offset from the previous file, we'll + * need it to initialize the persistent header information. + */ + lastoff = lp->lsn.offset; + + /* Point the current LSN to the new file. */ + ++lp->lsn.file; + lp->lsn.offset = 0; + + /* Reset the file write offset. */ + lp->w_off = 0; + } else + lastoff = 0; + + /* Initialize the LSN information returned to the user. */ + lsn->file = lp->lsn.file; + lsn->offset = lp->lsn.offset; + + /* + * Insert persistent information as the first record in every file. + * Note that the previous length is wrong for the very first record + * of the log, but that's okay, we check for it during retrieval. + */ + if (lp->lsn.offset == 0) { + t.data = &lp->persist; + t.size = sizeof(LOGP); + if ((ret = __log_putr(dblp, lsn, + &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0) + return (ret); + + /* + * Record files open in this log. + * If we are recovering then we are in the + * process of outputting the files, don't do + * it again. + */ + if (!F_ISSET(dblp, DBLOG_RECOVER) && + (ret = __log_open_files(dbenv)) != 0) + return (ret); + + /* Update the LSN information returned to the user. */ + lsn->file = lp->lsn.file; + lsn->offset = lp->lsn.offset; + } + + /* Write the application's log record. */ + if ((ret = __log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len)) != 0) + return (ret); + + /* + * On a checkpoint, we: + * Put out the checkpoint record (above). + * Save the LSN of the checkpoint in the shared region. + * Append the set of file name information into the log. + */ + if (flags == DB_CHECKPOINT) { + lp->chkpt_lsn = *lsn; + if ((ret = __log_open_files(dbenv)) != 0) + return (ret); + } + + /* + * On a checkpoint or when flush is requested, we: + * Flush the current buffer contents to disk. + * Sync the log to disk. + */ + if (flags == DB_FLUSH || flags == DB_CHECKPOINT) + if ((ret = __log_flush(dblp, NULL)) != 0) + return (ret); + + /* + * On a checkpoint, we: + * Save the time the checkpoint was written. + * Reset the bytes written since the last checkpoint. + */ + if (flags == DB_CHECKPOINT) { + (void)time(&lp->chkpt); + lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; + } + return (0); +} + +/* + * __log_putr -- + * Actually put a record into the log. + */ +static int +__log_putr(dblp, lsn, dbt, prev) + DB_LOG *dblp; + DB_LSN *lsn; + const DBT *dbt; + u_int32_t prev; +{ + HDR hdr; + LOG *lp; + int ret; + + lp = dblp->reginfo.primary; + + /* + * Initialize the header. If we just switched files, lsn.offset will + * be 0, and what we really want is the offset of the previous record + * in the previous file. Fortunately, prev holds the value we want. + */ + hdr.prev = prev; + hdr.len = sizeof(HDR) + dbt->size; + hdr.cksum = __ham_func4(NULL, dbt->data, dbt->size); + + if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0) + return (ret); + lp->len = sizeof(HDR); + lp->lsn.offset += sizeof(HDR); + + if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0) + return (ret); + lp->len += dbt->size; + lp->lsn.offset += dbt->size; + return (0); +} + +/* + * log_flush -- + * Write all records less than or equal to the specified LSN. + */ +int +log_flush(dbenv, lsn) + DB_ENV *dbenv; + const DB_LSN *lsn; +{ + DB_LOG *dblp; + int ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_flush(dbenv, lsn)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + dblp = dbenv->lg_handle; + R_LOCK(dbenv, &dblp->reginfo); + ret = __log_flush(dblp, lsn); + R_UNLOCK(dbenv, &dblp->reginfo); + return (ret); +} + +/* + * __log_flush -- + * Write all records less than or equal to the specified LSN; internal + * version. + */ +static int +__log_flush(dblp, lsn) + DB_LOG *dblp; + const DB_LSN *lsn; +{ + DB_LSN t_lsn; + LOG *lp; + int current, ret; + + ret = 0; + lp = dblp->reginfo.primary; + + /* + * If no LSN specified, flush the entire log by setting the flush LSN + * to the last LSN written in the log. Otherwise, check that the LSN + * isn't a non-existent record for the log. + */ + if (lsn == NULL) { + t_lsn.file = lp->lsn.file; + t_lsn.offset = lp->lsn.offset - lp->len; + lsn = &t_lsn; + } else + if (lsn->file > lp->lsn.file || + (lsn->file == lp->lsn.file && + lsn->offset > lp->lsn.offset - lp->len)) { + __db_err(dblp->dbenv, + "log_flush: LSN past current end-of-log"); + return (EINVAL); + } + + /* + * If the LSN is less than or equal to the last-sync'd LSN, we're done. + * Note, the last-sync LSN saved in s_lsn is the LSN of the first byte + * after the byte we absolutely know was written to disk, so the test + * is <, not <=. + */ + if (lsn->file < lp->s_lsn.file || + (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset)) + return (0); + + /* + * We may need to write the current buffer. We have to write the + * current buffer if the flush LSN is greater than or equal to the + * buffer's starting LSN. + */ + current = 0; + if (lp->b_off != 0 && log_compare(lsn, &lp->f_lsn) >= 0) { + if ((ret = __log_write(dblp, dblp->bufp, lp->b_off)) != 0) + return (ret); + + lp->b_off = 0; + current = 1; + } + + /* + * It's possible that this thread may never have written to this log + * file. Acquire a file descriptor if we don't already have one. + * One last check -- if we're not writing anything from the current + * buffer, don't bother. We have nothing to write and nothing to + * sync. + */ + if (dblp->lfname != lp->lsn.file) { + if (!current) + return (0); + if ((ret = __log_newfh(dblp)) != 0) + return (ret); + } + + /* Sync all writes to disk. */ + if ((ret = __os_fsync(dblp->dbenv, &dblp->lfh)) != 0) + return (__db_panic(dblp->dbenv, ret)); + ++lp->stat.st_scount; + + /* Set the last-synced LSN, using the on-disk write offset. */ + lp->s_lsn.file = lp->f_lsn.file; + lp->s_lsn.offset = lp->w_off; + + return (0); +} + +/* + * __log_fill -- + * Write information into the log. + */ +static int +__log_fill(dblp, lsn, addr, len) + DB_LOG *dblp; + DB_LSN *lsn; + void *addr; + u_int32_t len; +{ + LOG *lp; + u_int32_t bsize, nrec; + size_t nw, remain; + int ret; + + lp = dblp->reginfo.primary; + bsize = lp->buffer_size; + + while (len > 0) { /* Copy out the data. */ + /* + * If we're beginning a new buffer, note the user LSN to which + * the first byte of the buffer belongs. We have to know this + * when flushing the buffer so that we know if the in-memory + * buffer needs to be flushed. + */ + if (lp->b_off == 0) + lp->f_lsn = *lsn; + + /* + * If we're on a buffer boundary and the data is big enough, + * copy as many records as we can directly from the data. + */ + if (lp->b_off == 0 && len >= bsize) { + nrec = len / bsize; + if ((ret = __log_write(dblp, addr, nrec * bsize)) != 0) + return (ret); + addr = (u_int8_t *)addr + nrec * bsize; + len -= nrec * bsize; + ++lp->stat.st_wcount_fill; + continue; + } + + /* Figure out how many bytes we can copy this time. */ + remain = bsize - lp->b_off; + nw = remain > len ? len : remain; + memcpy(dblp->bufp + lp->b_off, addr, nw); + addr = (u_int8_t *)addr + nw; + len -= nw; + lp->b_off += nw; + + /* If we fill the buffer, flush it. */ + if (lp->b_off == bsize) { + if ((ret = __log_write(dblp, dblp->bufp, bsize)) != 0) + return (ret); + lp->b_off = 0; + ++lp->stat.st_wcount_fill; + } + } + return (0); +} + +/* + * __log_write -- + * Write the log buffer to disk. + */ +static int +__log_write(dblp, addr, len) + DB_LOG *dblp; + void *addr; + u_int32_t len; +{ + LOG *lp; + size_t nw; + int ret; + + /* + * If we haven't opened the log file yet or the current one + * has changed, acquire a new log file. + */ + lp = dblp->reginfo.primary; + if (!F_ISSET(&dblp->lfh, DB_FH_VALID) || dblp->lfname != lp->lsn.file) + if ((ret = __log_newfh(dblp)) != 0) + return (ret); + + /* + * Seek to the offset in the file (someone may have written it + * since we last did). + */ + if ((ret = + __os_seek(dblp->dbenv, + &dblp->lfh, 0, 0, lp->w_off, 0, DB_OS_SEEK_SET)) != 0 || + (ret = __os_write(dblp->dbenv, &dblp->lfh, addr, len, &nw)) != 0) + return (__db_panic(dblp->dbenv, ret)); + if (nw != len) { + __db_err(dblp->dbenv, "Short write while writing log"); + return (EIO); + } + + /* Reset the buffer offset and update the seek offset. */ + lp->w_off += len; + + /* Update written statistics. */ + if ((lp->stat.st_w_bytes += len) >= MEGABYTE) { + lp->stat.st_w_bytes -= MEGABYTE; + ++lp->stat.st_w_mbytes; + } + if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) { + lp->stat.st_wc_bytes -= MEGABYTE; + ++lp->stat.st_wc_mbytes; + } + ++lp->stat.st_wcount; + + return (0); +} + +/* + * log_file -- + * Map a DB_LSN to a file name. + */ +int +log_file(dbenv, lsn, namep, len) + DB_ENV *dbenv; + const DB_LSN *lsn; + char *namep; + size_t len; +{ + DB_LOG *dblp; + int ret; + char *name; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_file(dbenv, lsn, namep, len)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + dblp = dbenv->lg_handle; + R_LOCK(dbenv, &dblp->reginfo); + ret = __log_name(dblp, lsn->file, &name, NULL, 0); + R_UNLOCK(dbenv, &dblp->reginfo); + if (ret != 0) + return (ret); + + /* Check to make sure there's enough room and copy the name. */ + if (len < strlen(name) + 1) { + *namep = '\0'; + __db_err(dbenv, "log_file: name buffer is too short"); + return (EINVAL); + } + (void)strcpy(namep, name); + __os_freestr(name); + + return (0); +} + +/* + * __log_newfh -- + * Acquire a file handle for the current log file. + */ +static int +__log_newfh(dblp) + DB_LOG *dblp; +{ + LOG *lp; + int ret; + char *name; + + /* Close any previous file descriptor. */ + if (F_ISSET(&dblp->lfh, DB_FH_VALID)) + (void)__os_closehandle(&dblp->lfh); + + /* Get the path of the new file and open it. */ + lp = dblp->reginfo.primary; + dblp->lfname = lp->lsn.file; + + /* + * Adding DB_OSO_LOG to the flags may add additional platform-specific + * optimizations. On WinNT, the logfile is preallocated, which may + * have a time penalty at startup, but have better overall throughput. + * We are not certain that this works reliably, so enable at your own + * risk. + * + * XXX: + * Initialize the log file size. This is a hack to push the log's + * maximum size down into the Windows __os_open routine, because it + * wants to pre-allocate it. + */ + dblp->lfh.log_size = dblp->dbenv->lg_max; + if ((ret = __log_name(dblp, dblp->lfname, + &name, &dblp->lfh, + DB_OSO_CREATE |/* DB_OSO_LOG |*/ DB_OSO_SEQ)) != 0) + __db_err(dblp->dbenv, + "log_put: %s: %s", name, db_strerror(ret)); + + __os_freestr(name); + return (ret); +} + +/* + * __log_name -- + * Return the log name for a particular file, and optionally open it. + * + * PUBLIC: int __log_name __P((DB_LOG *, + * PUBLIC: u_int32_t, char **, DB_FH *, u_int32_t)); + */ +int +__log_name(dblp, filenumber, namep, fhp, flags) + DB_LOG *dblp; + u_int32_t filenumber, flags; + char **namep; + DB_FH *fhp; +{ + LOG *lp; + int ret; + char *oname; + char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20]; + + lp = dblp->reginfo.primary; + + /* + * !!! + * The semantics of this routine are bizarre. + * + * The reason for all of this is that we need a place where we can + * intercept requests for log files, and, if appropriate, check for + * both the old-style and new-style log file names. The trick is + * that all callers of this routine that are opening the log file + * read-only want to use an old-style file name if they can't find + * a match using a new-style name. The only down-side is that some + * callers may check for the old-style when they really don't need + * to, but that shouldn't mess up anything, and we only check for + * the old-style name when we've already failed to find a new-style + * one. + * + * Create a new-style file name, and if we're not going to open the + * file, return regardless. + */ + (void)snprintf(new, sizeof(new), LFNAME, filenumber); + if ((ret = __db_appname(dblp->dbenv, + DB_APP_LOG, NULL, new, 0, NULL, namep)) != 0 || fhp == NULL) + return (ret); + + /* Open the new-style file -- if we succeed, we're done. */ + if ((ret = __os_open(dblp->dbenv, + *namep, flags, lp->persist.mode, fhp)) == 0) + return (0); + + /* + * The open failed... if the DB_RDONLY flag isn't set, we're done, + * the caller isn't interested in old-style files. + */ + if (!LF_ISSET(DB_OSO_RDONLY)) { + __db_err(dblp->dbenv, + "%s: log file open failed: %s", *namep, db_strerror(ret)); + return (__db_panic(dblp->dbenv, ret)); + } + + /* Create an old-style file name. */ + (void)snprintf(old, sizeof(old), LFNAME_V1, filenumber); + if ((ret = __db_appname(dblp->dbenv, + DB_APP_LOG, NULL, old, 0, NULL, &oname)) != 0) + goto err; + + /* + * Open the old-style file -- if we succeed, we're done. Free the + * space allocated for the new-style name and return the old-style + * name to the caller. + */ + if ((ret = __os_open(dblp->dbenv, + oname, flags, lp->persist.mode, fhp)) == 0) { + __os_freestr(*namep); + *namep = oname; + return (0); + } + + /* + * Couldn't find either style of name -- return the new-style name + * for the caller's error message. If it's an old-style name that's + * actually missing we're going to confuse the user with the error + * message, but that implies that not only were we looking for an + * old-style name, but we expected it to exist and we weren't just + * looking for any log file. That's not a likely error. + */ +err: __os_freestr(oname); + return (ret); +} + +static int +__log_open_files(dbenv) + DB_ENV *dbenv; +{ + DB_LOG *dblp; + DB_LSN r_unused; + DBT fid_dbt, t; + FNAME *fnp; + LOG *lp; + int ret; + + dblp = dbenv->lg_handle; + lp = dblp->reginfo.primary; + + for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); + fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { + if (fnp->ref == 0) /* Entry not in use. */ + continue; + if (fnp->name_off != INVALID_ROFF) { + memset(&t, 0, sizeof(t)); + t.data = R_ADDR(&dblp->reginfo, fnp->name_off); + t.size = strlen(t.data) + 1; + } + memset(&fid_dbt, 0, sizeof(fid_dbt)); + fid_dbt.data = fnp->ufid; + fid_dbt.size = DB_FILE_ID_LEN; + /* + * Output LOG_CHECKPOINT records which will be + * processed during the OPENFILES pass of recovery. + * At the end of recovery we want to output the + * files that were open so that a future recovery + * run will have the correct files open during + * a backward pass. For this we output LOG_CLOSE + * records so that the files will be closed on + * the forward pass. + */ + if ((ret = __log_register_log(dbenv, + NULL, &r_unused, 0, + F_ISSET(dblp, DBLOG_RECOVER) ? LOG_CLOSE : LOG_CHECKPOINT, + fnp->name_off == INVALID_ROFF ? NULL : &t, + &fid_dbt, fnp->id, fnp->s_type, fnp->meta_pgno)) != 0) + return (ret); + } + return (0); +} |