summaryrefslogtreecommitdiff
path: root/bdb/db/db_am.c
diff options
context:
space:
mode:
Diffstat (limited to 'bdb/db/db_am.c')
-rw-r--r--bdb/db/db_am.c511
1 files changed, 511 insertions, 0 deletions
diff --git a/bdb/db/db_am.c b/bdb/db/db_am.c
new file mode 100644
index 00000000000..2d224566904
--- /dev/null
+++ b/bdb/db/db_am.c
@@ -0,0 +1,511 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: db_am.c,v 11.42 2001/01/11 18:19:50 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "btree.h"
+#include "hash.h"
+#include "qam.h"
+#include "lock.h"
+#include "mp.h"
+#include "txn.h"
+#include "db_am.h"
+#include "db_ext.h"
+
+/*
+ * __db_cursor --
+ * Allocate and return a cursor.
+ *
+ * PUBLIC: int __db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t));
+ */
+int
+__db_cursor(dbp, txn, dbcp, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ DBC **dbcp;
+ u_int32_t flags;
+{
+ DB_ENV *dbenv;
+ DBC *dbc;
+ db_lockmode_t mode;
+ u_int32_t op;
+ int ret;
+
+ dbenv = dbp->dbenv;
+
+ PANIC_CHECK(dbenv);
+ DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor");
+
+ /* Check for invalid flags. */
+ if ((ret = __db_cursorchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
+ return (ret);
+
+ if ((ret =
+ __db_icursor(dbp, txn, dbp->type, PGNO_INVALID, 0, dbcp)) != 0)
+ return (ret);
+ dbc = *dbcp;
+
+ /*
+ * If this is CDB, do all the locking in the interface, which is
+ * right here.
+ */
+ if (CDB_LOCKING(dbenv)) {
+ op = LF_ISSET(DB_OPFLAGS_MASK);
+ mode = (op == DB_WRITELOCK) ? DB_LOCK_WRITE :
+ ((op == DB_WRITECURSOR) ? DB_LOCK_IWRITE : DB_LOCK_READ);
+ if ((ret = lock_get(dbenv, dbc->locker, 0,
+ &dbc->lock_dbt, mode, &dbc->mylock)) != 0) {
+ (void)__db_c_close(dbc);
+ return (ret);
+ }
+ if (op == DB_WRITECURSOR)
+ F_SET(dbc, DBC_WRITECURSOR);
+ if (op == DB_WRITELOCK)
+ F_SET(dbc, DBC_WRITER);
+ }
+
+ return (0);
+}
+
+/*
+ * __db_icursor --
+ * Internal version of __db_cursor. If dbcp is
+ * non-NULL it is assumed to point to an area to
+ * initialize as a cursor.
+ *
+ * PUBLIC: int __db_icursor
+ * PUBLIC: __P((DB *, DB_TXN *, DBTYPE, db_pgno_t, int, DBC **));
+ */
+int
+__db_icursor(dbp, txn, dbtype, root, is_opd, dbcp)
+ DB *dbp;
+ DB_TXN *txn;
+ DBTYPE dbtype;
+ db_pgno_t root;
+ int is_opd;
+ DBC **dbcp;
+{
+ DBC *dbc, *adbc;
+ DBC_INTERNAL *cp;
+ DB_ENV *dbenv;
+ int allocated, ret;
+
+ dbenv = dbp->dbenv;
+ allocated = 0;
+
+ /*
+ * Take one from the free list if it's available. Take only the
+ * right type. With off page dups we may have different kinds
+ * of cursors on the queue for a single database.
+ */
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (dbc = TAILQ_FIRST(&dbp->free_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links))
+ if (dbtype == dbc->dbtype) {
+ TAILQ_REMOVE(&dbp->free_queue, dbc, links);
+ dbc->flags = 0;
+ break;
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+
+ if (dbc == NULL) {
+ if ((ret = __os_calloc(dbp->dbenv, 1, sizeof(DBC), &dbc)) != 0)
+ return (ret);
+ allocated = 1;
+ dbc->flags = 0;
+
+ dbc->dbp = dbp;
+
+ /* Set up locking information. */
+ if (LOCKING_ON(dbenv)) {
+ /*
+ * If we are not threaded, then there is no need to
+ * create new locker ids. We know that no one else
+ * is running concurrently using this DB, so we can
+ * take a peek at any cursors on the active queue.
+ */
+ if (!DB_IS_THREADED(dbp) &&
+ (adbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
+ dbc->lid = adbc->lid;
+ else
+ if ((ret = lock_id(dbenv, &dbc->lid)) != 0)
+ goto err;
+
+ memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN);
+ if (CDB_LOCKING(dbenv)) {
+ if (F_ISSET(dbenv, DB_ENV_CDB_ALLDB)) {
+ /*
+ * If we are doing a single lock per
+ * environment, set up the global
+ * lock object just like we do to
+ * single thread creates.
+ */
+ DB_ASSERT(sizeof(db_pgno_t) ==
+ sizeof(u_int32_t));
+ dbc->lock_dbt.size = sizeof(u_int32_t);
+ dbc->lock_dbt.data = &dbc->lock.pgno;
+ dbc->lock.pgno = 0;
+ } else {
+ dbc->lock_dbt.size = DB_FILE_ID_LEN;
+ dbc->lock_dbt.data = dbc->lock.fileid;
+ }
+ } else {
+ dbc->lock.type = DB_PAGE_LOCK;
+ dbc->lock_dbt.size = sizeof(dbc->lock);
+ dbc->lock_dbt.data = &dbc->lock;
+ }
+ }
+ /* Init the DBC internal structure. */
+ switch (dbtype) {
+ case DB_BTREE:
+ case DB_RECNO:
+ if ((ret = __bam_c_init(dbc, dbtype)) != 0)
+ goto err;
+ break;
+ case DB_HASH:
+ if ((ret = __ham_c_init(dbc)) != 0)
+ goto err;
+ break;
+ case DB_QUEUE:
+ if ((ret = __qam_c_init(dbc)) != 0)
+ goto err;
+ break;
+ default:
+ ret = __db_unknown_type(dbp->dbenv,
+ "__db_icursor", dbtype);
+ goto err;
+ }
+
+ cp = dbc->internal;
+ }
+
+ /* Refresh the DBC structure. */
+ dbc->dbtype = dbtype;
+
+ if ((dbc->txn = txn) == NULL)
+ dbc->locker = dbc->lid;
+ else {
+ dbc->locker = txn->txnid;
+ txn->cursors++;
+ }
+
+ if (is_opd)
+ F_SET(dbc, DBC_OPD);
+ if (F_ISSET(dbp, DB_AM_RECOVER))
+ F_SET(dbc, DBC_RECOVER);
+
+ /* Refresh the DBC internal structure. */
+ cp = dbc->internal;
+ cp->opd = NULL;
+
+ cp->indx = 0;
+ cp->page = NULL;
+ cp->pgno = PGNO_INVALID;
+ cp->root = root;
+
+ switch (dbtype) {
+ case DB_BTREE:
+ case DB_RECNO:
+ if ((ret = __bam_c_refresh(dbc)) != 0)
+ goto err;
+ break;
+ case DB_HASH:
+ case DB_QUEUE:
+ break;
+ default:
+ ret = __db_unknown_type(dbp->dbenv, "__db_icursor", dbp->type);
+ goto err;
+ }
+
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links);
+ F_SET(dbc, DBC_ACTIVE);
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+
+ *dbcp = dbc;
+ return (0);
+
+err: if (allocated)
+ __os_free(dbc, sizeof(*dbc));
+ return (ret);
+}
+
+#ifdef DEBUG
+/*
+ * __db_cprint --
+ * Display the current cursor list.
+ *
+ * PUBLIC: int __db_cprint __P((DB *));
+ */
+int
+__db_cprint(dbp)
+ DB *dbp;
+{
+ static const FN fn[] = {
+ { DBC_ACTIVE, "active" },
+ { DBC_OPD, "off-page-dup" },
+ { DBC_RECOVER, "recover" },
+ { DBC_RMW, "read-modify-write" },
+ { DBC_WRITECURSOR, "write cursor" },
+ { DBC_WRITEDUP, "internally dup'ed write cursor" },
+ { DBC_WRITER, "short-term write cursor" },
+ { 0, NULL }
+ };
+ DBC *dbc;
+ DBC_INTERNAL *cp;
+ char *s;
+
+ MUTEX_THREAD_LOCK(dbp->dbenv, dbp->mutexp);
+ for (dbc = TAILQ_FIRST(&dbp->active_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+ switch (dbc->dbtype) {
+ case DB_BTREE:
+ s = "btree";
+ break;
+ case DB_HASH:
+ s = "hash";
+ break;
+ case DB_RECNO:
+ s = "recno";
+ break;
+ case DB_QUEUE:
+ s = "queue";
+ break;
+ default:
+ DB_ASSERT(0);
+ return (1);
+ }
+ cp = dbc->internal;
+ fprintf(stderr, "%s/%#0lx: opd: %#0lx\n",
+ s, P_TO_ULONG(dbc), P_TO_ULONG(cp->opd));
+ fprintf(stderr, "\ttxn: %#0lx lid: %lu locker: %lu\n",
+ P_TO_ULONG(dbc->txn),
+ (u_long)dbc->lid, (u_long)dbc->locker);
+ fprintf(stderr, "\troot: %lu page/index: %lu/%lu",
+ (u_long)cp->root, (u_long)cp->pgno, (u_long)cp->indx);
+ __db_prflags(dbc->flags, fn, stderr);
+ fprintf(stderr, "\n");
+
+ if (dbp->type == DB_BTREE)
+ __bam_cprint(dbc);
+ }
+ for (dbc = TAILQ_FIRST(&dbp->free_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links))
+ fprintf(stderr, "free: %#0lx ", P_TO_ULONG(dbc));
+ fprintf(stderr, "\n");
+ MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp);
+
+ return (0);
+}
+#endif /* DEBUG */
+
+/*
+ * db_fd --
+ * Return a file descriptor for flock'ing.
+ *
+ * PUBLIC: int __db_fd __P((DB *, int *));
+ */
+int
+__db_fd(dbp, fdp)
+ DB *dbp;
+ int *fdp;
+{
+ DB_FH *fhp;
+ int ret;
+
+ PANIC_CHECK(dbp->dbenv);
+ DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd");
+
+ /*
+ * XXX
+ * Truly spectacular layering violation.
+ */
+ if ((ret = __mp_xxx_fh(dbp->mpf, &fhp)) != 0)
+ return (ret);
+
+ if (F_ISSET(fhp, DB_FH_VALID)) {
+ *fdp = fhp->fd;
+ return (0);
+ } else {
+ *fdp = -1;
+ __db_err(dbp->dbenv, "DB does not have a valid file handle.");
+ return (ENOENT);
+ }
+}
+
+/*
+ * __db_get --
+ * Return a key/data pair.
+ *
+ * PUBLIC: int __db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+ */
+int
+__db_get(dbp, txn, key, data, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ DBT *key, *data;
+ u_int32_t flags;
+{
+ DBC *dbc;
+ int mode, ret, t_ret;
+
+ PANIC_CHECK(dbp->dbenv);
+ DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get");
+
+ if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
+ return (ret);
+
+ mode = 0;
+ if (flags == DB_CONSUME || flags == DB_CONSUME_WAIT)
+ mode = DB_WRITELOCK;
+ if ((ret = dbp->cursor(dbp, txn, &dbc, mode)) != 0)
+ return (ret);
+
+ DEBUG_LREAD(dbc, txn, "__db_get", key, NULL, flags);
+
+ /*
+ * The DBC_TRANSIENT flag indicates that we're just doing a
+ * single operation with this cursor, and that in case of
+ * error we don't need to restore it to its old position--we're
+ * going to close it right away. Thus, we can perform the get
+ * without duplicating the cursor, saving some cycles in this
+ * common case.
+ */
+ F_SET(dbc, DBC_TRANSIENT);
+
+ ret = dbc->c_get(dbc, key, data,
+ flags == 0 || flags == DB_RMW ? flags | DB_SET : flags);
+
+ if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __db_put --
+ * Store a key/data pair.
+ *
+ * PUBLIC: int __db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+ */
+int
+__db_put(dbp, txn, key, data, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ DBT *key, *data;
+ u_int32_t flags;
+{
+ DBC *dbc;
+ DBT tdata;
+ int ret, t_ret;
+
+ PANIC_CHECK(dbp->dbenv);
+ DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put");
+
+ if ((ret = __db_putchk(dbp, key, data,
+ flags, F_ISSET(dbp, DB_AM_RDONLY),
+ F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))) != 0)
+ return (ret);
+
+ DB_CHECK_TXN(dbp, txn);
+
+ if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
+ return (ret);
+
+ /*
+ * See the comment in __db_get().
+ *
+ * Note that the c_get in the DB_NOOVERWRITE case is safe to
+ * do with this flag set; if it errors in any way other than
+ * DB_NOTFOUND, we're going to close the cursor without doing
+ * anything else, and if it returns DB_NOTFOUND then it's safe
+ * to do a c_put(DB_KEYLAST) even if an access method moved the
+ * cursor, since that's not position-dependent.
+ */
+ F_SET(dbc, DBC_TRANSIENT);
+
+ DEBUG_LWRITE(dbc, txn, "__db_put", key, data, flags);
+
+ if (flags == DB_NOOVERWRITE) {
+ flags = 0;
+ /*
+ * Set DB_DBT_USERMEM, this might be a threaded application and
+ * the flags checking will catch us. We don't want the actual
+ * data, so request a partial of length 0.
+ */
+ memset(&tdata, 0, sizeof(tdata));
+ F_SET(&tdata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
+
+ /*
+ * If we're doing page-level locking, set the read-modify-write
+ * flag, we're going to overwrite immediately.
+ */
+ if ((ret = dbc->c_get(dbc, key, &tdata,
+ DB_SET | (STD_LOCKING(dbc) ? DB_RMW : 0))) == 0)
+ ret = DB_KEYEXIST;
+ else if (ret == DB_NOTFOUND)
+ ret = 0;
+ }
+ if (ret == 0)
+ ret = dbc->c_put(dbc,
+ key, data, flags == 0 ? DB_KEYLAST : flags);
+
+ if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __db_sync --
+ * Flush the database cache.
+ *
+ * PUBLIC: int __db_sync __P((DB *, u_int32_t));
+ */
+int
+__db_sync(dbp, flags)
+ DB *dbp;
+ u_int32_t flags;
+{
+ int ret, t_ret;
+
+ PANIC_CHECK(dbp->dbenv);
+ DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync");
+
+ if ((ret = __db_syncchk(dbp, flags)) != 0)
+ return (ret);
+
+ /* Read-only trees never need to be sync'd. */
+ if (F_ISSET(dbp, DB_AM_RDONLY))
+ return (0);
+
+ /* If it's a Recno tree, write the backing source text file. */
+ if (dbp->type == DB_RECNO)
+ ret = __ram_writeback(dbp);
+
+ /* If the tree was never backed by a database file, we're done. */
+ if (F_ISSET(dbp, DB_AM_INMEM))
+ return (0);
+
+ /* Flush any dirty pages from the cache to the backing file. */
+ if ((t_ret = memp_fsync(dbp->mpf)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}