/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996, 1997, 1998, 1999, 2000 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint static const char revid[] = "$Id: log_get.c,v 11.32 2001/01/11 18:19:53 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include #include #include #endif #ifdef HAVE_RPC #include "db_server.h" #endif #include "db_int.h" #include "db_page.h" #include "log.h" #include "hash.h" #ifdef HAVE_RPC #include "gen_client_ext.h" #include "rpc_client_ext.h" #endif /* * log_get -- * Get a log record. */ int log_get(dbenv, alsn, dbt, flags) DB_ENV *dbenv; DB_LSN *alsn; DBT *dbt; u_int32_t flags; { DB_LOG *dblp; DB_LSN saved_lsn; int ret; #ifdef HAVE_RPC if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) return (__dbcl_log_get(dbenv, alsn, dbt, flags)); #endif PANIC_CHECK(dbenv); ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, DB_INIT_LOG); /* Validate arguments. */ if (flags != DB_CHECKPOINT && flags != DB_CURRENT && flags != DB_FIRST && flags != DB_LAST && flags != DB_NEXT && flags != DB_PREV && flags != DB_SET) return (__db_ferr(dbenv, "log_get", 1)); if (F_ISSET(dbenv, DB_ENV_THREAD)) { if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT) return (__db_ferr(dbenv, "log_get", 1)); if (!F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERMEM)) return (__db_ferr(dbenv, "threaded data", 1)); } dblp = dbenv->lg_handle; R_LOCK(dbenv, &dblp->reginfo); /* * The alsn field is only initialized if DB_SET is the flag, so this * assignment causes uninitialized memory complaints for other flag * values. */ #ifdef UMRW if (flags == DB_SET) saved_lsn = *alsn; else ZERO_LSN(saved_lsn); #else saved_lsn = *alsn; #endif /* * If we get one of the log's header records, repeat the operation. * This assumes that applications don't ever request the log header * records by LSN, but that seems reasonable to me. */ if ((ret = __log_get(dblp, alsn, dbt, flags, 0)) == 0 && alsn->offset == 0) { switch (flags) { case DB_FIRST: flags = DB_NEXT; break; case DB_LAST: flags = DB_PREV; break; } if (F_ISSET(dbt, DB_DBT_MALLOC)) { __os_free(dbt->data, dbt->size); dbt->data = NULL; } ret = __log_get(dblp, alsn, dbt, flags, 0); } if (ret != 0) *alsn = saved_lsn; R_UNLOCK(dbenv, &dblp->reginfo); return (ret); } /* * __log_get -- * Get a log record; internal version. * * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int)); */ int __log_get(dblp, alsn, dbt, flags, silent) DB_LOG *dblp; DB_LSN *alsn; DBT *dbt; u_int32_t flags; int silent; { DB_ENV *dbenv; DB_LSN nlsn; HDR hdr; LOG *lp; const char *fail; char *np, *tbuf; int cnt, ret; logfile_validity status; size_t len, nr; u_int32_t offset; u_int8_t *p; void *shortp, *readp; lp = dblp->reginfo.primary; fail = np = tbuf = NULL; dbenv = dblp->dbenv; nlsn = dblp->c_lsn; switch (flags) { case DB_CHECKPOINT: nlsn = lp->chkpt_lsn; if (IS_ZERO_LSN(nlsn)) { /* No db_err. The caller may expect this. */ ret = ENOENT; goto err2; } break; case DB_NEXT: /* Next log record. */ if (!IS_ZERO_LSN(nlsn)) { /* Increment the cursor by the cursor record size. */ nlsn.offset += dblp->c_len; break; } /* FALLTHROUGH */ case DB_FIRST: /* Find the first log record. */ /* Find the first log file. */ if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0) goto err2; /* * We want any readable version, so either DB_LV_NORMAL * or DB_LV_OLD_READABLE is acceptable here. If it's * not one of those two, there is no first log record that * we can read. */ if (status != DB_LV_NORMAL && status != DB_LV_OLD_READABLE) { ret = DB_NOTFOUND; goto err2; } /* * We may have only entered records in the buffer, and not * yet written a log file. If no log files were found and * there's anything in the buffer, it belongs to file 1. */ if (cnt == 0) cnt = 1; nlsn.file = cnt; nlsn.offset = 0; break; case DB_CURRENT: /* Current log record. */ break; case DB_PREV: /* Previous log record. */ if (!IS_ZERO_LSN(nlsn)) { /* If at start-of-file, move to the previous file. */ if (nlsn.offset == 0) { if (nlsn.file == 1 || __log_valid(dblp, nlsn.file - 1, 0, &status) != 0) return (DB_NOTFOUND); if (status != DB_LV_NORMAL && status != DB_LV_OLD_READABLE) return (DB_NOTFOUND); --nlsn.file; nlsn.offset = dblp->c_off; } else nlsn.offset = dblp->c_off; break; } /* FALLTHROUGH */ case DB_LAST: /* Last log record. */ nlsn.file = lp->lsn.file; nlsn.offset = lp->lsn.offset - lp->len; break; case DB_SET: /* Set log record. */ nlsn = *alsn; break; } if (0) { /* Move to the next file. */ next_file: ++nlsn.file; nlsn.offset = 0; } /* Return 1 if the request is past the end of the log. */ if (nlsn.file > lp->lsn.file || (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset)) return (DB_NOTFOUND); /* If we've switched files, discard the current file handle. */ if (dblp->c_lsn.file != nlsn.file && F_ISSET(&dblp->c_fh, DB_FH_VALID)) { (void)__os_closehandle(&dblp->c_fh); } /* If the entire record is in the in-memory buffer, copy it out. */ if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) { /* Copy the header. */ p = dblp->bufp + (nlsn.offset - lp->w_off); memcpy(&hdr, p, sizeof(HDR)); /* Copy the record. */ len = hdr.len - sizeof(HDR); if ((ret = __db_retcopy(NULL, dbt, p + sizeof(HDR), len, &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) goto err2; goto cksum; } shortp = NULL; /* Acquire a file descriptor. */ if (!F_ISSET(&dblp->c_fh, DB_FH_VALID)) { if ((ret = __log_name(dblp, nlsn.file, &np, &dblp->c_fh, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) { fail = np; goto err1; } __os_freestr(np); np = NULL; } /* See if we've already read this */ if (nlsn.file == dblp->r_file && nlsn.offset > dblp->r_off && nlsn.offset + sizeof(HDR) < dblp->r_off + dblp->r_size) goto got_header; /* * Seek to the header offset and read the header. Because the file * may be pre-allocated, we have to make sure that we're not reading * past the information in the start of the in-memory buffer. */ readp = &hdr; offset = nlsn.offset; if (nlsn.file == lp->lsn.file && offset + sizeof(HDR) > lp->w_off) nr = lp->w_off - offset; else if (dblp->readbufp == NULL) nr = sizeof(HDR); else { nr = lp->buffer_size; readp = dblp->readbufp; dblp->r_file = nlsn.file; /* Going backwards. Put the current in the middle. */ if (flags == DB_PREV || flags == DB_LAST) { if (offset <= lp->buffer_size/2) offset = 0; else offset = offset - lp->buffer_size/2; } if (nlsn.file == lp->lsn.file && offset + nr > lp->lsn.offset) nr = lp->lsn.offset - offset; dblp->r_off = offset; } if ((ret = __os_seek(dblp->dbenv, &dblp->c_fh, 0, 0, offset, 0, DB_OS_SEEK_SET)) != 0) { fail = "seek"; goto err1; } if ((ret = __os_read(dblp->dbenv, &dblp->c_fh, readp, nr, &nr)) != 0) { fail = "read"; goto err1; } if (nr < sizeof(HDR)) { /* If read returns EOF, try the next file. */ if (nr == 0) { if (flags != DB_NEXT || nlsn.file == lp->lsn.file) goto corrupt; goto next_file; } if (dblp->readbufp != NULL) memcpy((u_int8_t *) &hdr, readp, nr); /* * If read returns a short count the rest of the record has * to be in the in-memory buffer. */ if (lp->b_off < sizeof(HDR) - nr) goto corrupt; /* Get the rest of the header from the in-memory buffer. */ memcpy((u_int8_t *)&hdr + nr, dblp->bufp, sizeof(HDR) - nr); if (hdr.len == 0) goto next_file; shortp = dblp->bufp + (sizeof(HDR) - nr); } else if (dblp->readbufp != NULL) { dblp->r_size = nr; got_header: memcpy((u_int8_t *)&hdr, dblp->readbufp + (nlsn.offset - dblp->r_off), sizeof(HDR)); } /* * Check for buffers of 0's, that's what we usually see during recovery, * although it's certainly not something on which we can depend. Check * for impossibly large records. The malloc should fail later, but we * have customers that run mallocs that handle allocation failure as a * fatal error. */ if (hdr.len == 0) goto next_file; if (hdr.len <= sizeof(HDR) || hdr.len > lp->persist.lg_max) goto corrupt; len = hdr.len - sizeof(HDR); /* If we've already moved to the in-memory buffer, fill from there. */ if (shortp != NULL) { if (lp->b_off < ((u_int8_t *)shortp - dblp->bufp) + len) goto corrupt; if ((ret = __db_retcopy(NULL, dbt, shortp, len, &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) goto err2; goto cksum; } if (dblp->readbufp != NULL) { if (nlsn.offset + hdr.len < dblp->r_off + dblp->r_size) { if ((ret = __db_retcopy(NULL, dbt, dblp->readbufp + (nlsn.offset - dblp->r_off) + sizeof(HDR), len, &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) goto err2; goto cksum; } else if ((ret = __os_seek(dblp->dbenv, &dblp->c_fh, 0, 0, nlsn.offset + sizeof(HDR), 0, DB_OS_SEEK_SET)) != 0) { fail = "seek"; goto err1; } } /* * Allocate temporary memory to hold the record. * * XXX * We're calling malloc(3) with a region locked. This isn't * a good idea. */ if ((ret = __os_malloc(dbenv, len, NULL, &tbuf)) != 0) goto err1; /* * Read the record into the buffer. If read returns a short count, * there was an error or the rest of the record is in the in-memory * buffer. Note, the information may be garbage if we're in recovery, * so don't read past the end of the buffer's memory. * * Because the file may be pre-allocated, we have to make sure that * we're not reading past the information in the start of the in-memory * buffer. */ if (nlsn.file == lp->lsn.file && nlsn.offset + sizeof(HDR) + len > lp->w_off) nr = lp->w_off - (nlsn.offset + sizeof(HDR)); else nr = len; if ((ret = __os_read(dblp->dbenv, &dblp->c_fh, tbuf, nr, &nr)) != 0) { fail = "read"; goto err1; } if (len - nr > lp->buffer_size) goto corrupt; if (nr != len) { if (lp->b_off < len - nr) goto corrupt; /* Get the rest of the record from the in-memory buffer. */ memcpy((u_int8_t *)tbuf + nr, dblp->bufp, len - nr); } /* Copy the record into the user's DBT. */ if ((ret = __db_retcopy(NULL, dbt, tbuf, len, &dblp->c_dbt.data, &dblp->c_dbt.ulen)) != 0) goto err2; __os_free(tbuf, 0); tbuf = NULL; cksum: /* * If the user specified a partial record read, the checksum can't * match. It's not an obvious thing to do, but a user testing for * the length of a record might do it. */ if (!F_ISSET(dbt, DB_DBT_PARTIAL) && hdr.cksum != __ham_func4(NULL, dbt->data, dbt->size)) { if (!silent) __db_err(dbenv, "log_get: checksum mismatch"); goto corrupt; } /* Update the cursor and the return lsn. */ dblp->c_off = hdr.prev; dblp->c_len = hdr.len; dblp->c_lsn = nlsn; *alsn = nlsn; return (0); corrupt:/* * This is the catchall -- for some reason we didn't find enough * information or it wasn't reasonable information, and it wasn't * because a system call failed. */ ret = EIO; fail = "read"; err1: if (!silent) { if (fail == NULL) __db_err(dbenv, "log_get: %s", db_strerror(ret)); else __db_err(dbenv, "log_get: %s: %s", fail, db_strerror(ret)); } err2: if (np != NULL) __os_freestr(np); if (tbuf != NULL) __os_free(tbuf, 0); return (ret); }