summaryrefslogtreecommitdiff
path: root/storage/bdb/btree/bt_curadj.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/bdb/btree/bt_curadj.c')
-rw-r--r--storage/bdb/btree/bt_curadj.c582
1 files changed, 582 insertions, 0 deletions
diff --git a/storage/bdb/btree/bt_curadj.c b/storage/bdb/btree/bt_curadj.c
new file mode 100644
index 00000000000..50d3d422e49
--- /dev/null
+++ b/storage/bdb/btree/bt_curadj.c
@@ -0,0 +1,582 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2002
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: bt_curadj.c,v 11.30 2002/07/03 19:03:48 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+
+static int __bam_opd_cursor __P((DB *, DBC *, db_pgno_t, u_int32_t, u_int32_t));
+
+#ifdef DEBUG
+/*
+ * __bam_cprint --
+ * Display the current internal cursor.
+ *
+ * PUBLIC: void __bam_cprint __P((DBC *));
+ */
+void
+__bam_cprint(dbc)
+ DBC *dbc;
+{
+ BTREE_CURSOR *cp;
+
+ cp = (BTREE_CURSOR *)dbc->internal;
+
+ fprintf(stderr, "\tinternal: ovflsize: %lu", (u_long)cp->ovflsize);
+ if (dbc->dbtype == DB_RECNO)
+ fprintf(stderr, " recno: %lu", (u_long)cp->recno);
+ if (F_ISSET(cp, C_DELETED))
+ fprintf(stderr, " (deleted)");
+ fprintf(stderr, "\n");
+}
+#endif
+
+/*
+ * Cursor adjustments are logged if they are for subtransactions. This is
+ * because it's possible for a subtransaction to adjust cursors which will
+ * still be active after the subtransaction aborts, and so which must be
+ * restored to their previous locations. Cursors that can be both affected
+ * by our cursor adjustments and active after our transaction aborts can
+ * only be found in our parent transaction -- cursors in other transactions,
+ * including other child transactions of our parent, must have conflicting
+ * locker IDs, and so cannot be affected by adjustments in this transaction.
+ */
+
+/*
+ * __bam_ca_delete --
+ * Update the cursors when items are deleted and when already deleted
+ * items are overwritten. Return the number of relevant cursors found.
+ *
+ * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, int));
+ */
+int
+__bam_ca_delete(dbp, pgno, indx, delete)
+ DB *dbp;
+ db_pgno_t pgno;
+ u_int32_t indx;
+ int delete;
+{
+ BTREE_CURSOR *cp;
+ DB *ldbp;
+ DB_ENV *dbenv;
+ DBC *dbc;
+ int count; /* !!!: Has to contain max number of cursors. */
+
+ dbenv = dbp->dbenv;
+
+ /*
+ * Adjust the cursors. We have the page write locked, so the
+ * only other cursors that can be pointing at a page are
+ * those in the same thread of control. Unfortunately, we don't
+ * know that they're using the same DB handle, so traverse
+ * all matching DB handles in the same DB_ENV, then all cursors
+ * on each matching DB handle.
+ *
+ * Each cursor is single-threaded, so we only need to lock the
+ * list of DBs and then the list of cursors in each DB.
+ */
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ for (count = 0, ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (dbc = TAILQ_FIRST(&ldbp->active_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+ cp = (BTREE_CURSOR *)dbc->internal;
+ if (cp->pgno == pgno && cp->indx == indx) {
+ if (delete)
+ F_SET(cp, C_DELETED);
+ else
+ F_CLR(cp, C_DELETED);
+ ++count;
+ }
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+ return (count);
+}
+
+/*
+ * __ram_ca_delete --
+ * Return the number of relevant cursors.
+ *
+ * PUBLIC: int __ram_ca_delete __P((DB *, db_pgno_t));
+ */
+int
+__ram_ca_delete(dbp, root_pgno)
+ DB *dbp;
+ db_pgno_t root_pgno;
+{
+ DB *ldbp;
+ DBC *dbc;
+ DB_ENV *dbenv;
+ int found;
+
+ found = 0;
+ dbenv = dbp->dbenv;
+
+ /*
+ * Review the cursors. See the comment in __bam_ca_delete().
+ */
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ found == 0 && ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (dbc = TAILQ_FIRST(&ldbp->active_queue);
+ found == 0 && dbc != NULL; dbc = TAILQ_NEXT(dbc, links))
+ if (dbc->internal->root == root_pgno)
+ found = 1;
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+ return (found);
+}
+
+/*
+ * __bam_ca_di --
+ * Adjust the cursors during a delete or insert.
+ *
+ * PUBLIC: int __bam_ca_di __P((DBC *, db_pgno_t, u_int32_t, int));
+ */
+int
+__bam_ca_di(my_dbc, pgno, indx, adjust)
+ DBC *my_dbc;
+ db_pgno_t pgno;
+ u_int32_t indx;
+ int adjust;
+{
+ DB *dbp, *ldbp;
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DB_TXN *my_txn;
+ DBC *dbc;
+ DBC_INTERNAL *cp;
+ int found, ret;
+
+ dbp = my_dbc->dbp;
+ dbenv = dbp->dbenv;
+
+ my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
+
+ /*
+ * Adjust the cursors. See the comment in __bam_ca_delete().
+ */
+ found = 0;
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (dbc = TAILQ_FIRST(&ldbp->active_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+ if (dbc->dbtype == DB_RECNO)
+ continue;
+ cp = dbc->internal;
+ if (cp->pgno == pgno && cp->indx >= indx) {
+ /* Cursor indices should never be negative. */
+ DB_ASSERT(cp->indx != 0 || adjust > 0);
+
+ cp->indx += adjust;
+ if (my_txn != NULL && dbc->txn != my_txn)
+ found = 1;
+ }
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
+ &lsn, 0, DB_CA_DI, pgno, 0, 0, adjust, indx, 0)) != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+/*
+ * __bam_opd_cursor -- create a new opd cursor.
+ */
+static int
+__bam_opd_cursor(dbp, dbc, first, tpgno, ti)
+ DB *dbp;
+ DBC *dbc;
+ db_pgno_t tpgno;
+ u_int32_t first, ti;
+{
+ BTREE_CURSOR *cp, *orig_cp;
+ DBC *dbc_nopd;
+ int ret;
+
+ orig_cp = (BTREE_CURSOR *)dbc->internal;
+ dbc_nopd = NULL;
+
+ /*
+ * Allocate a new cursor and create the stack. If duplicates
+ * are sorted, we've just created an off-page duplicate Btree.
+ * If duplicates aren't sorted, we've just created a Recno tree.
+ *
+ * Note that in order to get here at all, there shouldn't be
+ * an old off-page dup cursor--to augment the checking db_c_newopd
+ * will do, assert this.
+ */
+ DB_ASSERT(orig_cp->opd == NULL);
+ if ((ret = __db_c_newopd(dbc, tpgno, orig_cp->opd, &dbc_nopd)) != 0)
+ return (ret);
+
+ cp = (BTREE_CURSOR *)dbc_nopd->internal;
+ cp->pgno = tpgno;
+ cp->indx = ti;
+
+ if (dbp->dup_compare == NULL) {
+ /*
+ * Converting to off-page Recno trees is tricky. The
+ * record number for the cursor is the index + 1 (to
+ * convert to 1-based record numbers).
+ */
+ cp->recno = ti + 1;
+ }
+
+ /*
+ * Transfer the deleted flag from the top-level cursor to the
+ * created one.
+ */
+ if (F_ISSET(orig_cp, C_DELETED)) {
+ F_SET(cp, C_DELETED);
+ F_CLR(orig_cp, C_DELETED);
+ }
+
+ /* Stack the cursors and reset the initial cursor's index. */
+ orig_cp->opd = dbc_nopd;
+ orig_cp->indx = first;
+ return (0);
+}
+
+/*
+ * __bam_ca_dup --
+ * Adjust the cursors when moving items from a leaf page to a duplicates
+ * page.
+ *
+ * PUBLIC: int __bam_ca_dup __P((DBC *,
+ * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t));
+ */
+int
+__bam_ca_dup(my_dbc, first, fpgno, fi, tpgno, ti)
+ DBC *my_dbc;
+ db_pgno_t fpgno, tpgno;
+ u_int32_t first, fi, ti;
+{
+ BTREE_CURSOR *orig_cp;
+ DB *dbp, *ldbp;
+ DBC *dbc;
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DB_TXN *my_txn;
+ int found, ret;
+
+ dbp = my_dbc->dbp;
+ dbenv = dbp->dbenv;
+ my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
+
+ /*
+ * Adjust the cursors. See the comment in __bam_ca_delete().
+ */
+ found = 0;
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (dbc = TAILQ_FIRST(&ldbp->active_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+ /* Find cursors pointing to this record. */
+ orig_cp = (BTREE_CURSOR *)dbc->internal;
+ if (orig_cp->pgno != fpgno || orig_cp->indx != fi)
+ continue;
+
+ /*
+ * Since we rescan the list see if this is already
+ * converted.
+ */
+ if (orig_cp->opd != NULL)
+ continue;
+
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ if ((ret = __bam_opd_cursor(dbp,
+ dbc, first, tpgno, ti)) !=0)
+ return (ret);
+ if (my_txn != NULL && dbc->txn != my_txn)
+ found = 1;
+ /* We released the mutex to get a cursor, start over. */
+ goto loop;
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
+ &lsn, 0, DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0)
+ return (ret);
+ }
+ return (0);
+}
+
+/*
+ * __bam_ca_undodup --
+ * Adjust the cursors when returning items to a leaf page
+ * from a duplicate page.
+ * Called only during undo processing.
+ *
+ * PUBLIC: int __bam_ca_undodup __P((DB *,
+ * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, u_int32_t));
+ */
+int
+__bam_ca_undodup(dbp, first, fpgno, fi, ti)
+ DB *dbp;
+ db_pgno_t fpgno;
+ u_int32_t first, fi, ti;
+{
+ BTREE_CURSOR *orig_cp;
+ DB *ldbp;
+ DBC *dbc;
+ DB_ENV *dbenv;
+ int ret;
+
+ dbenv = dbp->dbenv;
+
+ /*
+ * Adjust the cursors. See the comment in __bam_ca_delete().
+ */
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (dbc = TAILQ_FIRST(&ldbp->active_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+ orig_cp = (BTREE_CURSOR *)dbc->internal;
+
+ /*
+ * A note on the orig_cp->opd != NULL requirement here:
+ * it's possible that there's a cursor that refers to
+ * the same duplicate set, but which has no opd cursor,
+ * because it refers to a different item and we took
+ * care of it while processing a previous record.
+ */
+ if (orig_cp->pgno != fpgno ||
+ orig_cp->indx != first ||
+ orig_cp->opd == NULL ||
+ ((BTREE_CURSOR *)orig_cp->opd->internal)->indx
+ != ti)
+ continue;
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ if ((ret = orig_cp->opd->c_close(orig_cp->opd)) != 0)
+ return (ret);
+ orig_cp->opd = NULL;
+ orig_cp->indx = fi;
+ /*
+ * We released the mutex to free a cursor,
+ * start over.
+ */
+ goto loop;
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+ return (0);
+}
+
+/*
+ * __bam_ca_rsplit --
+ * Adjust the cursors when doing reverse splits.
+ *
+ * PUBLIC: int __bam_ca_rsplit __P((DBC *, db_pgno_t, db_pgno_t));
+ */
+int
+__bam_ca_rsplit(my_dbc, fpgno, tpgno)
+ DBC* my_dbc;
+ db_pgno_t fpgno, tpgno;
+{
+ DB *dbp, *ldbp;
+ DBC *dbc;
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DB_TXN *my_txn;
+ int found, ret;
+
+ dbp = my_dbc->dbp;
+ dbenv = dbp->dbenv;
+ my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
+
+ /*
+ * Adjust the cursors. See the comment in __bam_ca_delete().
+ */
+ found = 0;
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (dbc = TAILQ_FIRST(&ldbp->active_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+ if (dbc->dbtype == DB_RECNO)
+ continue;
+ if (dbc->internal->pgno == fpgno) {
+ dbc->internal->pgno = tpgno;
+ if (my_txn != NULL && dbc->txn != my_txn)
+ found = 1;
+ }
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
+ &lsn, 0, DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0)
+ return (ret);
+ }
+ return (0);
+}
+
+/*
+ * __bam_ca_split --
+ * Adjust the cursors when splitting a page.
+ *
+ * PUBLIC: int __bam_ca_split __P((DBC *,
+ * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int));
+ */
+int
+__bam_ca_split(my_dbc, ppgno, lpgno, rpgno, split_indx, cleft)
+ DBC *my_dbc;
+ db_pgno_t ppgno, lpgno, rpgno;
+ u_int32_t split_indx;
+ int cleft;
+{
+ DB *dbp, *ldbp;
+ DBC *dbc;
+ DBC_INTERNAL *cp;
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DB_TXN *my_txn;
+ int found, ret;
+
+ dbp = my_dbc->dbp;
+ dbenv = dbp->dbenv;
+ my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
+
+ /*
+ * Adjust the cursors. See the comment in __bam_ca_delete().
+ *
+ * If splitting the page that a cursor was on, the cursor has to be
+ * adjusted to point to the same record as before the split. Most
+ * of the time we don't adjust pointers to the left page, because
+ * we're going to copy its contents back over the original page. If
+ * the cursor is on the right page, it is decremented by the number of
+ * records split to the left page.
+ */
+ found = 0;
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (dbc = TAILQ_FIRST(&ldbp->active_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+ if (dbc->dbtype == DB_RECNO)
+ continue;
+ cp = dbc->internal;
+ if (cp->pgno == ppgno) {
+ if (my_txn != NULL && dbc->txn != my_txn)
+ found = 1;
+ if (cp->indx < split_indx) {
+ if (cleft)
+ cp->pgno = lpgno;
+ } else {
+ cp->pgno = rpgno;
+ cp->indx -= split_indx;
+ }
+ }
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp,
+ my_dbc->txn, &lsn, 0, DB_CA_SPLIT, ppgno, rpgno,
+ cleft ? lpgno : PGNO_INVALID, 0, split_indx, 0)) != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+/*
+ * __bam_ca_undosplit --
+ * Adjust the cursors when undoing a split of a page.
+ * If we grew a level we will execute this for both the
+ * left and the right pages.
+ * Called only during undo processing.
+ *
+ * PUBLIC: void __bam_ca_undosplit __P((DB *,
+ * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t));
+ */
+void
+__bam_ca_undosplit(dbp, frompgno, topgno, lpgno, split_indx)
+ DB *dbp;
+ db_pgno_t frompgno, topgno, lpgno;
+ u_int32_t split_indx;
+{
+ DB *ldbp;
+ DBC *dbc;
+ DB_ENV *dbenv;
+ DBC_INTERNAL *cp;
+
+ dbenv = dbp->dbenv;
+
+ /*
+ * Adjust the cursors. See the comment in __bam_ca_delete().
+ *
+ * When backing out a split, we move the cursor back
+ * to the original offset and bump it by the split_indx.
+ */
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (dbc = TAILQ_FIRST(&ldbp->active_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+ if (dbc->dbtype == DB_RECNO)
+ continue;
+ cp = dbc->internal;
+ if (cp->pgno == topgno) {
+ cp->pgno = frompgno;
+ cp->indx += split_indx;
+ } else if (cp->pgno == lpgno)
+ cp->pgno = frompgno;
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+}