summaryrefslogtreecommitdiff
path: root/bdb/btree/bt_open.c
diff options
context:
space:
mode:
Diffstat (limited to 'bdb/btree/bt_open.c')
-rw-r--r--bdb/btree/bt_open.c425
1 files changed, 281 insertions, 144 deletions
diff --git a/bdb/btree/bt_open.c b/bdb/btree/bt_open.c
index 405c1880f5e..0b72391c267 100644
--- a/bdb/btree/bt_open.c
+++ b/bdb/btree/bt_open.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_open.c,v 11.42 2000/11/30 00:58:28 ubell Exp $";
+static const char revid[] = "$Id: bt_open.c,v 11.76 2002/09/04 19:06:42 margo Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -54,33 +54,38 @@ static const char revid[] = "$Id: bt_open.c,v 11.42 2000/11/30 00:58:28 ubell Ex
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_swap.h"
-#include "btree.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "log.h"
-#include "mp.h"
+#include "dbinc/crypto.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_swap.h"
+#include "dbinc/btree.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
+#include "dbinc/log.h"
+#include "dbinc/fop.h"
+
+static void __bam_init_meta __P((DB *, BTMETA *, db_pgno_t, DB_LSN *));
/*
* __bam_open --
* Open a btree.
*
- * PUBLIC: int __bam_open __P((DB *, const char *, db_pgno_t, u_int32_t));
+ * PUBLIC: int __bam_open __P((DB *,
+ * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t));
*/
int
-__bam_open(dbp, name, base_pgno, flags)
+__bam_open(dbp, txn, name, base_pgno, flags)
DB *dbp;
+ DB_TXN *txn;
const char *name;
db_pgno_t base_pgno;
u_int32_t flags;
{
BTREE *t;
+ COMPQUIET(name, NULL);
t = dbp->bt_internal;
/* Initialize the remaining fields/methods of the DB. */
- dbp->del = __bam_delete;
dbp->key_range = __bam_key_range;
dbp->stat = __bam_stat;
@@ -99,8 +104,8 @@ __bam_open(dbp, name, base_pgno, flags)
* Verify that the bt_minkey value specified won't cause the
* calculation of ovflsize to underflow [#2406] for this pagesize.
*/
- if (B_MINKEY_TO_OVFLSIZE(t->bt_minkey, dbp->pgsize) >
- B_MINKEY_TO_OVFLSIZE(DEFMINKEYPAGE, dbp->pgsize)) {
+ if (B_MINKEY_TO_OVFLSIZE(dbp, t->bt_minkey, dbp->pgsize) >
+ B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) {
__db_err(dbp->dbenv,
"bt_minkey value of %lu too high for page size of %lu",
(u_long)t->bt_minkey, (u_long)dbp->pgsize);
@@ -108,7 +113,7 @@ __bam_open(dbp, name, base_pgno, flags)
}
/* Start up the tree. */
- return (__bam_read_root(dbp, name, base_pgno, flags));
+ return (__bam_read_root(dbp, txn, base_pgno, flags));
}
/*
@@ -143,6 +148,7 @@ __bam_metachk(dbp, name, btm)
name, (u_long)vers);
return (DB_OLD_VERSION);
case 8:
+ case 9:
break;
default:
__db_err(dbenv,
@@ -187,13 +193,13 @@ __bam_metachk(dbp, name, btm)
if (F_ISSET(&btm->dbmeta, BTM_RECNUM)) {
if (dbp->type != DB_BTREE)
goto wrong_type;
- F_SET(dbp, DB_BT_RECNUM);
+ F_SET(dbp, DB_AM_RECNUM);
if ((ret = __db_fcchk(dbenv,
- "DB->open", dbp->flags, DB_AM_DUP, DB_BT_RECNUM)) != 0)
+ "DB->open", dbp->flags, DB_AM_DUP, DB_AM_RECNUM)) != 0)
return (ret);
} else
- if (F_ISSET(dbp, DB_BT_RECNUM)) {
+ if (F_ISSET(dbp, DB_AM_RECNUM)) {
__db_err(dbenv,
"%s: DB_RECNUM specified to open method but not set in database",
name);
@@ -203,9 +209,9 @@ __bam_metachk(dbp, name, btm)
if (F_ISSET(&btm->dbmeta, BTM_FIXEDLEN)) {
if (dbp->type != DB_RECNO)
goto wrong_type;
- F_SET(dbp, DB_RE_FIXEDLEN);
+ F_SET(dbp, DB_AM_FIXEDLEN);
} else
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN)) {
__db_err(dbenv,
"%s: DB_FIXEDLEN specified to open method but not set in database",
name);
@@ -215,9 +221,9 @@ __bam_metachk(dbp, name, btm)
if (F_ISSET(&btm->dbmeta, BTM_RENUMBER)) {
if (dbp->type != DB_RECNO)
goto wrong_type;
- F_SET(dbp, DB_RE_RENUMBER);
+ F_SET(dbp, DB_AM_RENUMBER);
} else
- if (F_ISSET(dbp, DB_RE_RENUMBER)) {
+ if (F_ISSET(dbp, DB_AM_RENUMBER)) {
__db_err(dbenv,
"%s: DB_RENUMBER specified to open method but not set in database",
name);
@@ -266,116 +272,129 @@ wrong_type:
/*
* __bam_read_root --
- * Check (and optionally create) a tree.
+ * Read the root page and check a tree.
*
- * PUBLIC: int __bam_read_root __P((DB *, const char *, db_pgno_t, u_int32_t));
+ * PUBLIC: int __bam_read_root __P((DB *, DB_TXN *, db_pgno_t, u_int32_t));
*/
int
-__bam_read_root(dbp, name, base_pgno, flags)
+__bam_read_root(dbp, txn, base_pgno, flags)
DB *dbp;
- const char *name;
+ DB_TXN *txn;
db_pgno_t base_pgno;
u_int32_t flags;
{
BTMETA *meta;
BTREE *t;
DBC *dbc;
- DB_LSN orig_lsn;
DB_LOCK metalock;
- PAGE *root;
- int locked, ret, t_ret;
+ DB_MPOOLFILE *mpf;
+ int ret, t_ret;
- ret = 0;
- t = dbp->bt_internal;
meta = NULL;
- root = NULL;
- locked = 0;
+ t = dbp->bt_internal;
+ LOCK_INIT(metalock);
+ mpf = dbp->mpf;
+ ret = 0;
- /*
- * Get a cursor. If DB_CREATE is specified, we may be creating
- * the root page, and to do that safely in CDB we need a write
- * cursor. In STD_LOCKING mode, we'll synchronize using the
- * meta page lock instead.
- */
- if ((ret = dbp->cursor(dbp, dbp->open_txn,
- &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbp->dbenv) ?
- DB_WRITECURSOR : 0)) != 0)
+ /* Get a cursor. */
+ if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
return (ret);
- /* Get, and optionally create the metadata page. */
+ /* Get the metadata page. */
if ((ret =
__db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0)
goto err;
- if ((ret = memp_fget(
- dbp->mpf, &base_pgno, DB_MPOOL_CREATE, (PAGE **)&meta)) != 0)
+ if ((ret = mpf->get(mpf, &base_pgno, 0, (PAGE **)&meta)) != 0)
goto err;
/*
- * If the magic number is correct, we're not creating the tree.
- * Correct any fields that may not be right. Note, all of the
- * local flags were set by DB->open.
+ * If the magic number is set, the tree has been created. Correct
+ * any fields that may not be right. Note, all of the local flags
+ * were set by DB->open.
+ *
+ * Otherwise, we'd better be in recovery or abort, in which case the
+ * metadata page will be created/initialized elsewhere.
*/
-again: if (meta->dbmeta.magic != 0) {
- t->bt_maxkey = meta->maxkey;
- t->bt_minkey = meta->minkey;
- t->re_pad = meta->re_pad;
- t->re_len = meta->re_len;
-
- t->bt_meta = base_pgno;
- t->bt_root = meta->root;
-
- (void)memp_fput(dbp->mpf, meta, 0);
- meta = NULL;
- goto done;
- }
+ DB_ASSERT(meta->dbmeta.magic != 0 ||
+ IS_RECOVERING(dbp->dbenv) || F_ISSET(dbp, DB_AM_RECOVER));
- /* In recovery if it's not there it will be created elsewhere.*/
- if (IS_RECOVERING(dbp->dbenv))
- goto done;
-
- /* If we're doing CDB; we now have to get the write lock. */
- if (CDB_LOCKING(dbp->dbenv)) {
- /*
- * We'd better have DB_CREATE set if we're actually doing
- * the create.
- */
- DB_ASSERT(LF_ISSET(DB_CREATE));
- if ((ret = lock_get(dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE,
- &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0)
- goto err;
- }
+ t->bt_maxkey = meta->maxkey;
+ t->bt_minkey = meta->minkey;
+ t->re_pad = meta->re_pad;
+ t->re_len = meta->re_len;
+
+ t->bt_meta = base_pgno;
+ t->bt_root = meta->root;
/*
- * If we are doing locking, relase the read lock and get a write lock.
- * We want to avoid deadlock.
+ * !!!
+ * If creating a subdatabase, we've already done an insert when
+ * we put the subdatabase's entry into the master database, so
+ * our last-page-inserted value is wrongly initialized for the
+ * master database, not the subdatabase we're creating. I'm not
+ * sure where the *right* place to clear this value is, it's not
+ * intuitively obvious that it belongs here.
*/
- if (locked == 0 && STD_LOCKING(dbc)) {
- if ((ret = __LPUT(dbc, metalock)) != 0)
- goto err;
- if ((ret = __db_lget(dbc,
- 0, base_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
- goto err;
- locked = 1;
- goto again;
- }
+ t->bt_lpgno = PGNO_INVALID;
+
+ /* We must initialize last_pgno, it could be stale. */
+ if (!LF_ISSET(DB_RDONLY) && dbp->meta_pgno == PGNO_BASE_MD) {
+ mpf->last_pgno(mpf, &meta->dbmeta.last_pgno);
+ ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY);
+ } else
+ ret = mpf->put(mpf, meta, 0);
+ meta = NULL;
+
+err: /* Put the metadata page back. */
+ if (meta != NULL && (t_ret = mpf->put(mpf, meta, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
+
+/*
+ * __bam_init_meta --
+ *
+ * Initialize a btree meta-data page. The following fields may need
+ * to be updated later: last_pgno, root.
+ */
+static void
+__bam_init_meta(dbp, meta, pgno, lsnp)
+ DB *dbp;
+ BTMETA *meta;
+ db_pgno_t pgno;
+ DB_LSN *lsnp;
+{
+ BTREE *t;
- /* Initialize the tree structure metadata information. */
- orig_lsn = meta->dbmeta.lsn;
memset(meta, 0, sizeof(BTMETA));
- meta->dbmeta.lsn = orig_lsn;
- meta->dbmeta.pgno = base_pgno;
+ meta->dbmeta.lsn = *lsnp;
+ meta->dbmeta.pgno = pgno;
meta->dbmeta.magic = DB_BTREEMAGIC;
meta->dbmeta.version = DB_BTREEVERSION;
meta->dbmeta.pagesize = dbp->pgsize;
+ if (F_ISSET(dbp, DB_AM_CHKSUM))
+ FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM);
+ if (F_ISSET(dbp, DB_AM_ENCRYPT)) {
+ meta->dbmeta.encrypt_alg =
+ ((DB_CIPHER *)dbp->dbenv->crypto_handle)->alg;
+ DB_ASSERT(meta->dbmeta.encrypt_alg != 0);
+ meta->crypto_magic = meta->dbmeta.magic;
+ }
meta->dbmeta.type = P_BTREEMETA;
meta->dbmeta.free = PGNO_INVALID;
+ meta->dbmeta.last_pgno = pgno;
if (F_ISSET(dbp, DB_AM_DUP))
F_SET(&meta->dbmeta, BTM_DUP);
- if (F_ISSET(dbp, DB_RE_FIXEDLEN))
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN))
F_SET(&meta->dbmeta, BTM_FIXEDLEN);
- if (F_ISSET(dbp, DB_BT_RECNUM))
+ if (F_ISSET(dbp, DB_AM_RECNUM))
F_SET(&meta->dbmeta, BTM_RECNUM);
- if (F_ISSET(dbp, DB_RE_RENUMBER))
+ if (F_ISSET(dbp, DB_AM_RENUMBER))
F_SET(&meta->dbmeta, BTM_RENUMBER);
if (F_ISSET(dbp, DB_AM_SUBDB))
F_SET(&meta->dbmeta, BTM_SUBDB);
@@ -385,14 +404,165 @@ again: if (meta->dbmeta.magic != 0) {
F_SET(&meta->dbmeta, BTM_RECNO);
memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
+ t = dbp->bt_internal;
meta->maxkey = t->bt_maxkey;
meta->minkey = t->bt_minkey;
meta->re_len = t->re_len;
meta->re_pad = t->re_pad;
+}
- /* If necessary, log the meta-data and root page creates. */
- if ((ret = __db_log_page(dbp,
- name, &orig_lsn, base_pgno, (PAGE *)meta)) != 0)
+/*
+ * __bam_new_file --
+ * Create the necessary pages to begin a new database file.
+ *
+ * This code appears more complex than it is because of the two cases (named
+ * and unnamed). The way to read the code is that for each page being created,
+ * there are three parts: 1) a "get page" chunk (which either uses malloc'd
+ * memory or calls mpf->get), 2) the initialization, and 3) the "put page"
+ * chunk which either does a fop write or an mpf->put.
+ *
+ * PUBLIC: int __bam_new_file __P((DB *, DB_TXN *, DB_FH *, const char *));
+ */
+int
+__bam_new_file(dbp, txn, fhp, name)
+ DB *dbp;
+ DB_TXN *txn;
+ DB_FH *fhp;
+ const char *name;
+{
+ BTMETA *meta;
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DB_MPOOLFILE *mpf;
+ DB_PGINFO pginfo;
+ DBT pdbt;
+ PAGE *root;
+ db_pgno_t pgno;
+ int ret;
+ void *buf;
+
+ dbenv = dbp->dbenv;
+ mpf = dbp->mpf;
+ root = NULL;
+ meta = NULL;
+ memset(&pdbt, 0, sizeof(pdbt));
+
+ /* Build meta-data page. */
+
+ if (name == NULL) {
+ pgno = PGNO_BASE_MD;
+ ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &meta);
+ } else {
+ pginfo.db_pagesize = dbp->pgsize;
+ pginfo.flags =
+ F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP));
+ pginfo.type = dbp->type;
+ pdbt.data = &pginfo;
+ pdbt.size = sizeof(pginfo);
+ ret = __os_calloc(dbp->dbenv, 1, dbp->pgsize, &buf);
+ meta = (BTMETA *)buf;
+ }
+ if (ret != 0)
+ return (ret);
+
+ LSN_NOT_LOGGED(lsn);
+ __bam_init_meta(dbp, meta, PGNO_BASE_MD, &lsn);
+ meta->root = 1;
+ meta->dbmeta.last_pgno = 1;
+
+ if (name == NULL)
+ ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY);
+ else {
+ if ((ret = __db_pgout(dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0)
+ goto err;
+ ret = __fop_write(dbenv,
+ txn, name, DB_APP_DATA, fhp, 0, buf, dbp->pgsize, 1);
+ }
+ if (ret != 0)
+ goto err;
+ meta = NULL;
+
+ /* Now build root page. */
+ if (name == NULL) {
+ pgno = 1;
+ if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &root)) != 0)
+ goto err;
+ } else {
+#ifdef DIAGNOSTIC
+ memset(buf, dbp->pgsize, 0);
+#endif
+ root = (PAGE *)buf;
+ }
+
+ P_INIT(root, dbp->pgsize, 1, PGNO_INVALID, PGNO_INVALID,
+ LEAFLEVEL, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE);
+ LSN_NOT_LOGGED(root->lsn);
+
+ if (name == NULL)
+ ret = mpf->put(mpf, root, DB_MPOOL_DIRTY);
+ else {
+ if ((ret = __db_pgout(dbenv, root->pgno, root, &pdbt)) != 0)
+ goto err;
+ ret = __fop_write(dbenv, txn,
+ name, DB_APP_DATA, fhp, dbp->pgsize, buf, dbp->pgsize, 1);
+ }
+ if (ret != 0)
+ goto err;
+ root = NULL;
+
+err: if (name != NULL)
+ __os_free(dbenv, buf);
+ else {
+ if (meta != NULL)
+ (void)mpf->put(mpf, meta, 0);
+ if (root != NULL)
+ (void)mpf->put(mpf, root, 0);
+ }
+ return (ret);
+}
+
+/*
+ * __bam_new_subdb --
+ * Create a metadata page and a root page for a new btree.
+ *
+ * PUBLIC: int __bam_new_subdb __P((DB *, DB *, DB_TXN *));
+ */
+int
+__bam_new_subdb(mdbp, dbp, txn)
+ DB *mdbp, *dbp;
+ DB_TXN *txn;
+{
+ BTMETA *meta;
+ DBC *dbc;
+ DB_ENV *dbenv;
+ DB_LOCK metalock;
+ DB_LSN lsn;
+ DB_MPOOLFILE *mpf;
+ PAGE *root;
+ int ret, t_ret;
+
+ dbenv = mdbp->dbenv;
+ mpf = mdbp->mpf;
+ dbc = NULL;
+ meta = NULL;
+ root = NULL;
+
+ if ((ret = mdbp->cursor(mdbp, txn,
+ &dbc, CDB_LOCKING(dbenv) ? DB_WRITECURSOR : 0)) != 0)
+ return (ret);
+
+ /* Get, and optionally create the metadata page. */
+ if ((ret = __db_lget(dbc,
+ 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
+ goto err;
+ if ((ret = mpf->get(mpf, &dbp->meta_pgno, DB_MPOOL_CREATE, &meta)) != 0)
+ goto err;
+
+ /* Build meta-data page. */
+ lsn = meta->dbmeta.lsn;
+ __bam_init_meta(dbp, meta, dbp->meta_pgno, &lsn);
+ if ((ret = __db_log_page(mdbp,
+ txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0)
goto err;
/* Create and initialize a root page. */
@@ -401,68 +571,35 @@ again: if (meta->dbmeta.magic != 0) {
goto err;
root->level = LEAFLEVEL;
- if (dbp->open_txn != NULL && (ret = __bam_root_log(dbp->dbenv,
- dbp->open_txn, &meta->dbmeta.lsn, 0, dbp->log_fileid,
+ if (DBENV_LOGGING(dbenv) &&
+ (ret = __bam_root_log(mdbp, txn, &meta->dbmeta.lsn, 0,
meta->dbmeta.pgno, root->pgno, &meta->dbmeta.lsn)) != 0)
goto err;
meta->root = root->pgno;
-
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, name);
- if ((ret = __db_log_page(dbp,
- name, &root->lsn, root->pgno, root)) != 0)
+ if ((ret =
+ __db_log_page(mdbp, txn, &root->lsn, root->pgno, root)) != 0)
goto err;
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name);
-
- t->bt_meta = base_pgno;
- t->bt_root = root->pgno;
/* Release the metadata and root pages. */
- if ((ret = memp_fput(dbp->mpf, meta, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY)) != 0)
goto err;
meta = NULL;
- if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->put(mpf, root, DB_MPOOL_DIRTY)) != 0)
goto err;
root = NULL;
-
- /*
- * Flush the metadata and root pages to disk.
- *
- * !!!
- * It's not useful to return not-yet-flushed here -- convert it to
- * an error.
- */
- if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE) {
- __db_err(dbp->dbenv, "Metapage flush failed");
- ret = EINVAL;
- }
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name);
-
-done: /*
- * !!!
- * We already did an insert and so the last-page-inserted has been
- * set. I'm not sure where the *right* place to clear this value
- * is, it's not intuitively obvious that it belongs here.
- */
- t->bt_lpgno = PGNO_INVALID;
-
err:
-DB_TEST_RECOVERY_LABEL
- /* Put any remaining pages back. */
if (meta != NULL)
- if ((t_ret = memp_fput(dbp->mpf, meta, 0)) != 0 &&
- ret == 0)
+ if ((t_ret = mpf->put(mpf, meta, 0)) != 0 && ret == 0)
ret = t_ret;
if (root != NULL)
- if ((t_ret = memp_fput(dbp->mpf, root, 0)) != 0 &&
- ret == 0)
+ if ((t_ret = mpf->put(mpf, root, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (LOCK_ISSET(metalock))
+ if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
+ ret = t_ret;
+ if (dbc != NULL)
+ if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
-
- /* We can release the metapage lock when we are done. */
- if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
- ret = t_ret;
-
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
return (ret);
}