diff options
Diffstat (limited to 'bdb/db/db_am.c')
-rw-r--r-- | bdb/db/db_am.c | 511 |
1 files changed, 511 insertions, 0 deletions
diff --git a/bdb/db/db_am.c b/bdb/db/db_am.c new file mode 100644 index 00000000000..2d224566904 --- /dev/null +++ b/bdb/db/db_am.c @@ -0,0 +1,511 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: db_am.c,v 11.42 2001/01/11 18:19:50 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_shash.h" +#include "btree.h" +#include "hash.h" +#include "qam.h" +#include "lock.h" +#include "mp.h" +#include "txn.h" +#include "db_am.h" +#include "db_ext.h" + +/* + * __db_cursor -- + * Allocate and return a cursor. + * + * PUBLIC: int __db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t)); + */ +int +__db_cursor(dbp, txn, dbcp, flags) + DB *dbp; + DB_TXN *txn; + DBC **dbcp; + u_int32_t flags; +{ + DB_ENV *dbenv; + DBC *dbc; + db_lockmode_t mode; + u_int32_t op; + int ret; + + dbenv = dbp->dbenv; + + PANIC_CHECK(dbenv); + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor"); + + /* Check for invalid flags. */ + if ((ret = __db_cursorchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) + return (ret); + + if ((ret = + __db_icursor(dbp, txn, dbp->type, PGNO_INVALID, 0, dbcp)) != 0) + return (ret); + dbc = *dbcp; + + /* + * If this is CDB, do all the locking in the interface, which is + * right here. + */ + if (CDB_LOCKING(dbenv)) { + op = LF_ISSET(DB_OPFLAGS_MASK); + mode = (op == DB_WRITELOCK) ? DB_LOCK_WRITE : + ((op == DB_WRITECURSOR) ? DB_LOCK_IWRITE : DB_LOCK_READ); + if ((ret = lock_get(dbenv, dbc->locker, 0, + &dbc->lock_dbt, mode, &dbc->mylock)) != 0) { + (void)__db_c_close(dbc); + return (ret); + } + if (op == DB_WRITECURSOR) + F_SET(dbc, DBC_WRITECURSOR); + if (op == DB_WRITELOCK) + F_SET(dbc, DBC_WRITER); + } + + return (0); +} + +/* + * __db_icursor -- + * Internal version of __db_cursor. If dbcp is + * non-NULL it is assumed to point to an area to + * initialize as a cursor. + * + * PUBLIC: int __db_icursor + * PUBLIC: __P((DB *, DB_TXN *, DBTYPE, db_pgno_t, int, DBC **)); + */ +int +__db_icursor(dbp, txn, dbtype, root, is_opd, dbcp) + DB *dbp; + DB_TXN *txn; + DBTYPE dbtype; + db_pgno_t root; + int is_opd; + DBC **dbcp; +{ + DBC *dbc, *adbc; + DBC_INTERNAL *cp; + DB_ENV *dbenv; + int allocated, ret; + + dbenv = dbp->dbenv; + allocated = 0; + + /* + * Take one from the free list if it's available. Take only the + * right type. With off page dups we may have different kinds + * of cursors on the queue for a single database. + */ + MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); + for (dbc = TAILQ_FIRST(&dbp->free_queue); + dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) + if (dbtype == dbc->dbtype) { + TAILQ_REMOVE(&dbp->free_queue, dbc, links); + dbc->flags = 0; + break; + } + MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); + + if (dbc == NULL) { + if ((ret = __os_calloc(dbp->dbenv, 1, sizeof(DBC), &dbc)) != 0) + return (ret); + allocated = 1; + dbc->flags = 0; + + dbc->dbp = dbp; + + /* Set up locking information. */ + if (LOCKING_ON(dbenv)) { + /* + * If we are not threaded, then there is no need to + * create new locker ids. We know that no one else + * is running concurrently using this DB, so we can + * take a peek at any cursors on the active queue. + */ + if (!DB_IS_THREADED(dbp) && + (adbc = TAILQ_FIRST(&dbp->active_queue)) != NULL) + dbc->lid = adbc->lid; + else + if ((ret = lock_id(dbenv, &dbc->lid)) != 0) + goto err; + + memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN); + if (CDB_LOCKING(dbenv)) { + if (F_ISSET(dbenv, DB_ENV_CDB_ALLDB)) { + /* + * If we are doing a single lock per + * environment, set up the global + * lock object just like we do to + * single thread creates. + */ + DB_ASSERT(sizeof(db_pgno_t) == + sizeof(u_int32_t)); + dbc->lock_dbt.size = sizeof(u_int32_t); + dbc->lock_dbt.data = &dbc->lock.pgno; + dbc->lock.pgno = 0; + } else { + dbc->lock_dbt.size = DB_FILE_ID_LEN; + dbc->lock_dbt.data = dbc->lock.fileid; + } + } else { + dbc->lock.type = DB_PAGE_LOCK; + dbc->lock_dbt.size = sizeof(dbc->lock); + dbc->lock_dbt.data = &dbc->lock; + } + } + /* Init the DBC internal structure. */ + switch (dbtype) { + case DB_BTREE: + case DB_RECNO: + if ((ret = __bam_c_init(dbc, dbtype)) != 0) + goto err; + break; + case DB_HASH: + if ((ret = __ham_c_init(dbc)) != 0) + goto err; + break; + case DB_QUEUE: + if ((ret = __qam_c_init(dbc)) != 0) + goto err; + break; + default: + ret = __db_unknown_type(dbp->dbenv, + "__db_icursor", dbtype); + goto err; + } + + cp = dbc->internal; + } + + /* Refresh the DBC structure. */ + dbc->dbtype = dbtype; + + if ((dbc->txn = txn) == NULL) + dbc->locker = dbc->lid; + else { + dbc->locker = txn->txnid; + txn->cursors++; + } + + if (is_opd) + F_SET(dbc, DBC_OPD); + if (F_ISSET(dbp, DB_AM_RECOVER)) + F_SET(dbc, DBC_RECOVER); + + /* Refresh the DBC internal structure. */ + cp = dbc->internal; + cp->opd = NULL; + + cp->indx = 0; + cp->page = NULL; + cp->pgno = PGNO_INVALID; + cp->root = root; + + switch (dbtype) { + case DB_BTREE: + case DB_RECNO: + if ((ret = __bam_c_refresh(dbc)) != 0) + goto err; + break; + case DB_HASH: + case DB_QUEUE: + break; + default: + ret = __db_unknown_type(dbp->dbenv, "__db_icursor", dbp->type); + goto err; + } + + MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); + TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links); + F_SET(dbc, DBC_ACTIVE); + MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); + + *dbcp = dbc; + return (0); + +err: if (allocated) + __os_free(dbc, sizeof(*dbc)); + return (ret); +} + +#ifdef DEBUG +/* + * __db_cprint -- + * Display the current cursor list. + * + * PUBLIC: int __db_cprint __P((DB *)); + */ +int +__db_cprint(dbp) + DB *dbp; +{ + static const FN fn[] = { + { DBC_ACTIVE, "active" }, + { DBC_OPD, "off-page-dup" }, + { DBC_RECOVER, "recover" }, + { DBC_RMW, "read-modify-write" }, + { DBC_WRITECURSOR, "write cursor" }, + { DBC_WRITEDUP, "internally dup'ed write cursor" }, + { DBC_WRITER, "short-term write cursor" }, + { 0, NULL } + }; + DBC *dbc; + DBC_INTERNAL *cp; + char *s; + + MUTEX_THREAD_LOCK(dbp->dbenv, dbp->mutexp); + for (dbc = TAILQ_FIRST(&dbp->active_queue); + dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { + switch (dbc->dbtype) { + case DB_BTREE: + s = "btree"; + break; + case DB_HASH: + s = "hash"; + break; + case DB_RECNO: + s = "recno"; + break; + case DB_QUEUE: + s = "queue"; + break; + default: + DB_ASSERT(0); + return (1); + } + cp = dbc->internal; + fprintf(stderr, "%s/%#0lx: opd: %#0lx\n", + s, P_TO_ULONG(dbc), P_TO_ULONG(cp->opd)); + fprintf(stderr, "\ttxn: %#0lx lid: %lu locker: %lu\n", + P_TO_ULONG(dbc->txn), + (u_long)dbc->lid, (u_long)dbc->locker); + fprintf(stderr, "\troot: %lu page/index: %lu/%lu", + (u_long)cp->root, (u_long)cp->pgno, (u_long)cp->indx); + __db_prflags(dbc->flags, fn, stderr); + fprintf(stderr, "\n"); + + if (dbp->type == DB_BTREE) + __bam_cprint(dbc); + } + for (dbc = TAILQ_FIRST(&dbp->free_queue); + dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) + fprintf(stderr, "free: %#0lx ", P_TO_ULONG(dbc)); + fprintf(stderr, "\n"); + MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp); + + return (0); +} +#endif /* DEBUG */ + +/* + * db_fd -- + * Return a file descriptor for flock'ing. + * + * PUBLIC: int __db_fd __P((DB *, int *)); + */ +int +__db_fd(dbp, fdp) + DB *dbp; + int *fdp; +{ + DB_FH *fhp; + int ret; + + PANIC_CHECK(dbp->dbenv); + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd"); + + /* + * XXX + * Truly spectacular layering violation. + */ + if ((ret = __mp_xxx_fh(dbp->mpf, &fhp)) != 0) + return (ret); + + if (F_ISSET(fhp, DB_FH_VALID)) { + *fdp = fhp->fd; + return (0); + } else { + *fdp = -1; + __db_err(dbp->dbenv, "DB does not have a valid file handle."); + return (ENOENT); + } +} + +/* + * __db_get -- + * Return a key/data pair. + * + * PUBLIC: int __db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + */ +int +__db_get(dbp, txn, key, data, flags) + DB *dbp; + DB_TXN *txn; + DBT *key, *data; + u_int32_t flags; +{ + DBC *dbc; + int mode, ret, t_ret; + + PANIC_CHECK(dbp->dbenv); + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get"); + + if ((ret = __db_getchk(dbp, key, data, flags)) != 0) + return (ret); + + mode = 0; + if (flags == DB_CONSUME || flags == DB_CONSUME_WAIT) + mode = DB_WRITELOCK; + if ((ret = dbp->cursor(dbp, txn, &dbc, mode)) != 0) + return (ret); + + DEBUG_LREAD(dbc, txn, "__db_get", key, NULL, flags); + + /* + * The DBC_TRANSIENT flag indicates that we're just doing a + * single operation with this cursor, and that in case of + * error we don't need to restore it to its old position--we're + * going to close it right away. Thus, we can perform the get + * without duplicating the cursor, saving some cycles in this + * common case. + */ + F_SET(dbc, DBC_TRANSIENT); + + ret = dbc->c_get(dbc, key, data, + flags == 0 || flags == DB_RMW ? flags | DB_SET : flags); + + if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_put -- + * Store a key/data pair. + * + * PUBLIC: int __db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + */ +int +__db_put(dbp, txn, key, data, flags) + DB *dbp; + DB_TXN *txn; + DBT *key, *data; + u_int32_t flags; +{ + DBC *dbc; + DBT tdata; + int ret, t_ret; + + PANIC_CHECK(dbp->dbenv); + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put"); + + if ((ret = __db_putchk(dbp, key, data, + flags, F_ISSET(dbp, DB_AM_RDONLY), + F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))) != 0) + return (ret); + + DB_CHECK_TXN(dbp, txn); + + if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) + return (ret); + + /* + * See the comment in __db_get(). + * + * Note that the c_get in the DB_NOOVERWRITE case is safe to + * do with this flag set; if it errors in any way other than + * DB_NOTFOUND, we're going to close the cursor without doing + * anything else, and if it returns DB_NOTFOUND then it's safe + * to do a c_put(DB_KEYLAST) even if an access method moved the + * cursor, since that's not position-dependent. + */ + F_SET(dbc, DBC_TRANSIENT); + + DEBUG_LWRITE(dbc, txn, "__db_put", key, data, flags); + + if (flags == DB_NOOVERWRITE) { + flags = 0; + /* + * Set DB_DBT_USERMEM, this might be a threaded application and + * the flags checking will catch us. We don't want the actual + * data, so request a partial of length 0. + */ + memset(&tdata, 0, sizeof(tdata)); + F_SET(&tdata, DB_DBT_USERMEM | DB_DBT_PARTIAL); + + /* + * If we're doing page-level locking, set the read-modify-write + * flag, we're going to overwrite immediately. + */ + if ((ret = dbc->c_get(dbc, key, &tdata, + DB_SET | (STD_LOCKING(dbc) ? DB_RMW : 0))) == 0) + ret = DB_KEYEXIST; + else if (ret == DB_NOTFOUND) + ret = 0; + } + if (ret == 0) + ret = dbc->c_put(dbc, + key, data, flags == 0 ? DB_KEYLAST : flags); + + if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_sync -- + * Flush the database cache. + * + * PUBLIC: int __db_sync __P((DB *, u_int32_t)); + */ +int +__db_sync(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + int ret, t_ret; + + PANIC_CHECK(dbp->dbenv); + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync"); + + if ((ret = __db_syncchk(dbp, flags)) != 0) + return (ret); + + /* Read-only trees never need to be sync'd. */ + if (F_ISSET(dbp, DB_AM_RDONLY)) + return (0); + + /* If it's a Recno tree, write the backing source text file. */ + if (dbp->type == DB_RECNO) + ret = __ram_writeback(dbp); + + /* If the tree was never backed by a database file, we're done. */ + if (F_ISSET(dbp, DB_AM_INMEM)) + return (0); + + /* Flush any dirty pages from the cache to the backing file. */ + if ((t_ret = memp_fsync(dbp->mpf)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} |