diff options
author | tim@threads.polyesthetic.msg <> | 2001-03-04 19:42:05 -0500 |
---|---|---|
committer | tim@threads.polyesthetic.msg <> | 2001-03-04 19:42:05 -0500 |
commit | 89dad52004ecba5a380aeebb0e2a9beaae88eb86 (patch) | |
tree | 9dd732e08dba156ee3d7635caedc0dc3107ecac6 /bdb/log/log_get.c | |
parent | 639a1069d313843288ba6d9cb54b290073a748a7 (diff) | |
download | mariadb-git-89dad52004ecba5a380aeebb0e2a9beaae88eb86.tar.gz |
Import changeset
Diffstat (limited to 'bdb/log/log_get.c')
-rw-r--r-- | bdb/log/log_get.c | 465 |
1 files changed, 465 insertions, 0 deletions
diff --git a/bdb/log/log_get.c b/bdb/log/log_get.c new file mode 100644 index 00000000000..b75d50a62fd --- /dev/null +++ b/bdb/log/log_get.c @@ -0,0 +1,465 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log_get.c,v 11.32 2001/01/11 18:19:53 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#include <unistd.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "db_page.h" +#include "log.h" +#include "hash.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +/* + * log_get -- + * Get a log record. + */ +int +log_get(dbenv, alsn, dbt, flags) + DB_ENV *dbenv; + DB_LSN *alsn; + DBT *dbt; + u_int32_t flags; +{ + DB_LOG *dblp; + DB_LSN saved_lsn; + int ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_log_get(dbenv, alsn, dbt, flags)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); + + /* Validate arguments. */ + if (flags != DB_CHECKPOINT && flags != DB_CURRENT && + flags != DB_FIRST && flags != DB_LAST && + flags != DB_NEXT && flags != DB_PREV && flags != DB_SET) + return (__db_ferr(dbenv, "log_get", 1)); + + if (F_ISSET(dbenv, DB_ENV_THREAD)) { + if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT) + return (__db_ferr(dbenv, "log_get", 1)); + if (!F_ISSET(dbt, + DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERMEM)) + return (__db_ferr(dbenv, "threaded data", 1)); + } + + dblp = dbenv->lg_handle; + R_LOCK(dbenv, &dblp->reginfo); + + /* + * The alsn field is only initialized if DB_SET is the flag, so this + * assignment causes uninitialized memory complaints for other flag + * values. + */ +#ifdef UMRW + if (flags == DB_SET) + saved_lsn = *alsn; + else + ZERO_LSN(saved_lsn); +#else + saved_lsn = *alsn; +#endif + + /* + * If we get one of the log's header records, repeat the operation. + * This assumes that applications don't ever request the log header + * records by LSN, but that seems reasonable to me. + */ + if ((ret = __log_get(dblp, + alsn, dbt, flags, 0)) == 0 && alsn->offset == 0) { + switch (flags) { + case DB_FIRST: + flags = DB_NEXT; + break; + case DB_LAST: + flags = DB_PREV; + break; + } + if (F_ISSET(dbt, DB_DBT_MALLOC)) { + __os_free(dbt->data, dbt->size); + dbt->data = NULL; + } + ret = __log_get(dblp, alsn, dbt, flags, 0); + } + if (ret != 0) + *alsn = saved_lsn; + + R_UNLOCK(dbenv, &dblp->reginfo); + + return (ret); +} + +/* + * __log_get -- + * Get a log record; internal version. + * + * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int)); + */ +int +__log_get(dblp, alsn, dbt, flags, silent) + DB_LOG *dblp; + DB_LSN *alsn; + DBT *dbt; + u_int32_t flags; + int silent; +{ + DB_ENV *dbenv; + DB_LSN nlsn; + HDR hdr; + LOG *lp; + const char *fail; + char *np, *tbuf; + int cnt, ret; + logfile_validity status; + size_t len, nr; + u_int32_t offset; + u_int8_t *p; + void *shortp, *readp; + + lp = dblp->reginfo.primary; + fail = np = tbuf = NULL; + dbenv = dblp->dbenv; + + nlsn = dblp->c_lsn; + switch (flags) { + case DB_CHECKPOINT: + nlsn = lp->chkpt_lsn; + if (IS_ZERO_LSN(nlsn)) { + /* No db_err. The caller may expect this. */ + ret = ENOENT; + goto err2; + } + break; + case DB_NEXT: /* Next log record. */ + if (!IS_ZERO_LSN(nlsn)) { + /* Increment the cursor by the cursor record size. */ + nlsn.offset += dblp->c_len; + break; + } + /* FALLTHROUGH */ + case DB_FIRST: /* Find the first log record. */ + /* Find the first log file. */ + if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0) + goto err2; + + /* + * We want any readable version, so either DB_LV_NORMAL + * or DB_LV_OLD_READABLE is acceptable here. If it's + * not one of those two, there is no first log record that + * we can read. + */ + if (status != DB_LV_NORMAL && status != DB_LV_OLD_READABLE) { + ret = DB_NOTFOUND; + goto err2; + } + + /* + * We may have only entered records in the buffer, and not + * yet written a log file. If no log files were found and + * there's anything in the buffer, it belongs to file 1. + */ + if (cnt == 0) + cnt = 1; + + nlsn.file = cnt; + nlsn.offset = 0; + break; + case DB_CURRENT: /* Current log record. */ + break; + case DB_PREV: /* Previous log record. */ + if (!IS_ZERO_LSN(nlsn)) { + /* If at start-of-file, move to the previous file. */ + if (nlsn.offset == 0) { + if (nlsn.file == 1 || + __log_valid(dblp, + nlsn.file - 1, 0, &status) != 0) + return (DB_NOTFOUND); + + if (status != DB_LV_NORMAL && + status != DB_LV_OLD_READABLE) + return (DB_NOTFOUND); + + --nlsn.file; + nlsn.offset = dblp->c_off; + } else + nlsn.offset = dblp->c_off; + break; + } + /* FALLTHROUGH */ + case DB_LAST: /* Last log record. */ + nlsn.file = lp->lsn.file; + nlsn.offset = lp->lsn.offset - lp->len; + break; + case DB_SET: /* Set log record. */ + nlsn = *alsn; + break; + } + + if (0) { /* Move to the next file. */ +next_file: ++nlsn.file; + nlsn.offset = 0; + } + + /* Return 1 if the request is past the end of the log. */ + if (nlsn.file > lp->lsn.file || + (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset)) + return (DB_NOTFOUND); + + /* If we've switched files, discard the current file handle. */ + if (dblp->c_lsn.file != nlsn.file && + F_ISSET(&dblp->c_fh, DB_FH_VALID)) { + (void)__os_closehandle(&dblp->c_fh); + } + + /* If the entire record is in the in-memory buffer, copy it out. */ + if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) { + /* Copy the header. */ + p = dblp->bufp + (nlsn.offset - lp->w_off); + memcpy(&hdr, p, sizeof(HDR)); + + /* Copy the record. */ + len = hdr.len - sizeof(HDR); + if ((ret = __db_retcopy(NULL, dbt, p + sizeof(HDR), + len, &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) + goto err2; + goto cksum; + } + + shortp = NULL; + + /* Acquire a file descriptor. */ + if (!F_ISSET(&dblp->c_fh, DB_FH_VALID)) { + if ((ret = __log_name(dblp, nlsn.file, + &np, &dblp->c_fh, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) { + fail = np; + goto err1; + } + __os_freestr(np); + np = NULL; + } + + /* See if we've already read this */ + if (nlsn.file == dblp->r_file && nlsn.offset > dblp->r_off + && nlsn.offset + sizeof(HDR) < dblp->r_off + dblp->r_size) + goto got_header; + + /* + * Seek to the header offset and read the header. Because the file + * may be pre-allocated, we have to make sure that we're not reading + * past the information in the start of the in-memory buffer. + */ + + readp = &hdr; + offset = nlsn.offset; + if (nlsn.file == lp->lsn.file && offset + sizeof(HDR) > lp->w_off) + nr = lp->w_off - offset; + else if (dblp->readbufp == NULL) + nr = sizeof(HDR); + else { + nr = lp->buffer_size; + readp = dblp->readbufp; + dblp->r_file = nlsn.file; + /* Going backwards. Put the current in the middle. */ + if (flags == DB_PREV || flags == DB_LAST) { + if (offset <= lp->buffer_size/2) + offset = 0; + else + offset = offset - lp->buffer_size/2; + } + if (nlsn.file == lp->lsn.file && offset + nr > lp->lsn.offset) + nr = lp->lsn.offset - offset; + dblp->r_off = offset; + } + + if ((ret = __os_seek(dblp->dbenv, + &dblp->c_fh, 0, 0, offset, 0, DB_OS_SEEK_SET)) != 0) { + fail = "seek"; + goto err1; + } + if ((ret = __os_read(dblp->dbenv, &dblp->c_fh, readp, nr, &nr)) != 0) { + fail = "read"; + goto err1; + } + if (nr < sizeof(HDR)) { + /* If read returns EOF, try the next file. */ + if (nr == 0) { + if (flags != DB_NEXT || nlsn.file == lp->lsn.file) + goto corrupt; + goto next_file; + } + + if (dblp->readbufp != NULL) + memcpy((u_int8_t *) &hdr, readp, nr); + + /* + * If read returns a short count the rest of the record has + * to be in the in-memory buffer. + */ + if (lp->b_off < sizeof(HDR) - nr) + goto corrupt; + + /* Get the rest of the header from the in-memory buffer. */ + memcpy((u_int8_t *)&hdr + nr, dblp->bufp, sizeof(HDR) - nr); + + if (hdr.len == 0) + goto next_file; + + shortp = dblp->bufp + (sizeof(HDR) - nr); + } + + else if (dblp->readbufp != NULL) { + dblp->r_size = nr; +got_header: memcpy((u_int8_t *)&hdr, + dblp->readbufp + (nlsn.offset - dblp->r_off), sizeof(HDR)); + } + + /* + * Check for buffers of 0's, that's what we usually see during recovery, + * although it's certainly not something on which we can depend. Check + * for impossibly large records. The malloc should fail later, but we + * have customers that run mallocs that handle allocation failure as a + * fatal error. + */ + if (hdr.len == 0) + goto next_file; + if (hdr.len <= sizeof(HDR) || hdr.len > lp->persist.lg_max) + goto corrupt; + len = hdr.len - sizeof(HDR); + + /* If we've already moved to the in-memory buffer, fill from there. */ + if (shortp != NULL) { + if (lp->b_off < ((u_int8_t *)shortp - dblp->bufp) + len) + goto corrupt; + if ((ret = __db_retcopy(NULL, dbt, shortp, len, + &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) + goto err2; + goto cksum; + } + + if (dblp->readbufp != NULL) { + if (nlsn.offset + hdr.len < dblp->r_off + dblp->r_size) { + if ((ret = __db_retcopy(NULL, dbt, dblp->readbufp + + (nlsn.offset - dblp->r_off) + sizeof(HDR), + len, &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) + goto err2; + goto cksum; + } else if ((ret = __os_seek(dblp->dbenv, &dblp->c_fh, 0, + 0, nlsn.offset + sizeof(HDR), 0, DB_OS_SEEK_SET)) != 0) { + fail = "seek"; + goto err1; + } + } + + /* + * Allocate temporary memory to hold the record. + * + * XXX + * We're calling malloc(3) with a region locked. This isn't + * a good idea. + */ + if ((ret = __os_malloc(dbenv, len, NULL, &tbuf)) != 0) + goto err1; + + /* + * Read the record into the buffer. If read returns a short count, + * there was an error or the rest of the record is in the in-memory + * buffer. Note, the information may be garbage if we're in recovery, + * so don't read past the end of the buffer's memory. + * + * Because the file may be pre-allocated, we have to make sure that + * we're not reading past the information in the start of the in-memory + * buffer. + */ + if (nlsn.file == lp->lsn.file && + nlsn.offset + sizeof(HDR) + len > lp->w_off) + nr = lp->w_off - (nlsn.offset + sizeof(HDR)); + else + nr = len; + if ((ret = __os_read(dblp->dbenv, &dblp->c_fh, tbuf, nr, &nr)) != 0) { + fail = "read"; + goto err1; + } + if (len - nr > lp->buffer_size) + goto corrupt; + if (nr != len) { + if (lp->b_off < len - nr) + goto corrupt; + + /* Get the rest of the record from the in-memory buffer. */ + memcpy((u_int8_t *)tbuf + nr, dblp->bufp, len - nr); + } + + /* Copy the record into the user's DBT. */ + if ((ret = __db_retcopy(NULL, dbt, tbuf, len, + &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) + goto err2; + __os_free(tbuf, 0); + tbuf = NULL; + +cksum: /* + * If the user specified a partial record read, the checksum can't + * match. It's not an obvious thing to do, but a user testing for + * the length of a record might do it. + */ + if (!F_ISSET(dbt, DB_DBT_PARTIAL) && + hdr.cksum != __ham_func4(NULL, dbt->data, dbt->size)) { + if (!silent) + __db_err(dbenv, "log_get: checksum mismatch"); + goto corrupt; + } + + /* Update the cursor and the return lsn. */ + dblp->c_off = hdr.prev; + dblp->c_len = hdr.len; + dblp->c_lsn = nlsn; + *alsn = nlsn; + + return (0); + +corrupt:/* + * This is the catchall -- for some reason we didn't find enough + * information or it wasn't reasonable information, and it wasn't + * because a system call failed. + */ + ret = EIO; + fail = "read"; + +err1: if (!silent) { + if (fail == NULL) + __db_err(dbenv, "log_get: %s", db_strerror(ret)); + else + __db_err(dbenv, + "log_get: %s: %s", fail, db_strerror(ret)); + } + +err2: if (np != NULL) + __os_freestr(np); + if (tbuf != NULL) + __os_free(tbuf, 0); + return (ret); +} |