summaryrefslogtreecommitdiff
path: root/src/db/db_vrfy.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/db/db_vrfy.c')
-rw-r--r--src/db/db_vrfy.c3055
1 files changed, 3055 insertions, 0 deletions
diff --git a/src/db/db_vrfy.c b/src/db/db_vrfy.c
new file mode 100644
index 00000000..9cb94ad2
--- /dev/null
+++ b/src/db/db_vrfy.c
@@ -0,0 +1,3055 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2000, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_swap.h"
+#include "dbinc/db_verify.h"
+#include "dbinc/btree.h"
+#include "dbinc/fop.h"
+#include "dbinc/hash.h"
+#include "dbinc/heap.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
+#include "dbinc/qam.h"
+#include "dbinc/txn.h"
+
+/*
+ * This is the code for DB->verify, the DB database consistency checker.
+ * For now, it checks all subdatabases in a database, and verifies
+ * everything it knows how to (i.e. it's all-or-nothing, and one can't
+ * check only for a subset of possible problems).
+ */
+
+static u_int __db_guesspgsize __P((ENV *, DB_FH *));
+static int __db_is_valid_magicno __P((u_int32_t, DBTYPE *));
+static int __db_meta2pgset
+ __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, DB *));
+static int __db_salvage __P((DB *, VRFY_DBINFO *,
+ db_pgno_t, void *, int (*)(void *, const void *), u_int32_t));
+static int __db_salvage_subdbpg __P((DB *, VRFY_DBINFO *,
+ PAGE *, void *, int (*)(void *, const void *), u_int32_t));
+static int __db_salvage_all __P((DB *, VRFY_DBINFO *, void *,
+ int(*)(void *, const void *), u_int32_t, int *));
+static int __db_salvage_unknowns __P((DB *, VRFY_DBINFO *, void *,
+ int (*)(void *, const void *), u_int32_t));
+static int __db_verify_arg __P((DB *, const char *, void *, u_int32_t));
+static int __db_vrfy_freelist
+ __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
+static int __db_vrfy_getpagezero
+ __P((DB *, DB_FH *, const char *, u_int8_t *, u_int32_t));
+static int __db_vrfy_invalid
+ __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
+static int __db_vrfy_orderchkonly __P((DB *,
+ VRFY_DBINFO *, const char *, const char *, u_int32_t));
+static int __db_vrfy_pagezero __P((DB *,
+ VRFY_DBINFO *, DB_FH *, const char *, u_int32_t));
+static int __db_vrfy_subdbs
+ __P((DB *, VRFY_DBINFO *, const char *, u_int32_t));
+static int __db_vrfy_structure __P((DB *, VRFY_DBINFO *,
+ const char *, db_pgno_t, void *, void *, u_int32_t));
+static int __db_vrfy_walkpages __P((DB *, VRFY_DBINFO *,
+ void *, int (*)(void *, const void *), u_int32_t));
+
+#define VERIFY_FLAGS \
+ (DB_AGGRESSIVE | \
+ DB_NOORDERCHK | DB_ORDERCHKONLY | DB_PRINTABLE | DB_SALVAGE | DB_UNREF)
+
+/*
+ * __db_verify_pp --
+ * DB->verify public interface.
+ *
+ * PUBLIC: int __db_verify_pp
+ * PUBLIC: __P((DB *, const char *, const char *, FILE *, u_int32_t));
+ */
+int
+__db_verify_pp(dbp, file, database, outfile, flags)
+ DB *dbp;
+ const char *file, *database;
+ FILE *outfile;
+ u_int32_t flags;
+{
+ /*
+ * __db_verify_pp is a wrapper to __db_verify_internal, which lets
+ * us pass appropriate equivalents to FILE * in from the non-C APIs.
+ * That's why the usual ENV_ENTER macros are in __db_verify_internal,
+ * not here.
+ */
+ return (__db_verify_internal(dbp,
+ file, database, outfile, __db_pr_callback, flags));
+}
+
+/*
+ * __db_verify_internal --
+ *
+ * PUBLIC: int __db_verify_internal __P((DB *, const char *,
+ * PUBLIC: const char *, void *, int (*)(void *, const void *), u_int32_t));
+ */
+int
+__db_verify_internal(dbp, fname, dname, handle, callback, flags)
+ DB *dbp;
+ const char *fname, *dname;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret, t_ret;
+
+ env = dbp->env;
+
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->verify");
+
+ if (!LF_ISSET(DB_SALVAGE))
+ LF_SET(DB_UNREF);
+
+ ENV_ENTER(env, ip);
+
+ if ((ret = __db_verify_arg(dbp, dname, handle, flags)) == 0)
+ ret = __db_verify(dbp, ip,
+ fname, dname, handle, callback, NULL, NULL, flags);
+
+ /* Db.verify is a DB handle destructor. */
+ if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0)
+ ret = t_ret;
+
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __db_verify_arg --
+ * Check DB->verify arguments.
+ */
+static int
+__db_verify_arg(dbp, dname, handle, flags)
+ DB *dbp;
+ const char *dname;
+ void *handle;
+ u_int32_t flags;
+{
+ ENV *env;
+ int ret;
+
+ env = dbp->env;
+
+ if ((ret = __db_fchk(env, "DB->verify", flags, VERIFY_FLAGS)) != 0)
+ return (ret);
+
+ /*
+ * DB_SALVAGE is mutually exclusive with the other flags except
+ * DB_AGGRESSIVE, DB_PRINTABLE.
+ *
+ * DB_AGGRESSIVE and DB_PRINTABLE are only meaningful when salvaging.
+ *
+ * DB_SALVAGE requires an output stream.
+ */
+ if (LF_ISSET(DB_SALVAGE)) {
+ if (LF_ISSET(~(DB_AGGRESSIVE | DB_PRINTABLE | DB_SALVAGE)))
+ return (__db_ferr(env, "DB->verify", 1));
+ if (handle == NULL) {
+ __db_errx(env, DB_STR("0518",
+ "DB_SALVAGE requires a an output handle"));
+ return (EINVAL);
+ }
+ } else
+ if (LF_ISSET(DB_AGGRESSIVE | DB_PRINTABLE))
+ return (__db_ferr(env, "DB->verify", 1));
+
+ /*
+ * DB_ORDERCHKONLY is mutually exclusive with DB_SALVAGE and
+ * DB_NOORDERCHK, and requires a database name.
+ */
+ if ((ret = __db_fcchk(env, "DB->verify", flags,
+ DB_ORDERCHKONLY, DB_SALVAGE | DB_NOORDERCHK)) != 0)
+ return (ret);
+ if (LF_ISSET(DB_ORDERCHKONLY) && dname == NULL) {
+ __db_errx(env, DB_STR("0519",
+ "DB_ORDERCHKONLY requires a database name"));
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/*
+ * __db_verify --
+ * Walk the entire file page-by-page, either verifying with or without
+ * dumping in db_dump -d format, or DB_SALVAGE-ing whatever key/data
+ * pairs can be found and dumping them in standard (db_load-ready)
+ * dump format.
+ *
+ * (Salvaging isn't really a verification operation, but we put it
+ * here anyway because it requires essentially identical top-level
+ * code.)
+ *
+ * flags may be 0, DB_NOORDERCHK, DB_ORDERCHKONLY, or DB_SALVAGE
+ * (and optionally DB_AGGRESSIVE).
+ * PUBLIC: int __db_verify __P((DB *, DB_THREAD_INFO *, const char *,
+ * PUBLIC: const char *, void *, int (*)(void *, const void *),
+ * PUBLIC: void *, void *, u_int32_t));
+ */
+int
+__db_verify(dbp, ip, name, subdb, handle, callback, lp, rp, flags)
+ DB *dbp;
+ DB_THREAD_INFO *ip;
+ const char *name, *subdb;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ void *lp, *rp;
+ u_int32_t flags;
+{
+ DB_FH *fhp;
+ ENV *env;
+ VRFY_DBINFO *vdp;
+ u_int32_t sflags;
+ int has_subdbs, isbad, ret, t_ret;
+ char *real_name;
+
+ env = dbp->env;
+ fhp = NULL;
+ vdp = NULL;
+ real_name = NULL;
+ has_subdbs = isbad = ret = t_ret = 0;
+
+ F_SET(dbp, DB_AM_VERIFYING);
+
+ /* Initialize any feedback function. */
+ if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL)
+ dbp->db_feedback(dbp, DB_VERIFY, 0);
+
+ /*
+ * We don't know how large the cache is, and if the database
+ * in question uses a small page size--which we don't know
+ * yet!--it may be uncomfortably small for the default page
+ * size [#2143]. However, the things we need temporary
+ * databases for in dbinfo are largely tiny, so using a
+ * 1024-byte pagesize is probably not going to be a big hit,
+ * and will make us fit better into small spaces.
+ */
+ if ((ret = __db_vrfy_dbinfo_create(env, ip, 1024, &vdp)) != 0)
+ goto err;
+
+ /*
+ * Note whether the user has requested that we use printable
+ * chars where possible. We won't get here with this flag if
+ * we're not salvaging.
+ */
+ if (LF_ISSET(DB_PRINTABLE))
+ F_SET(vdp, SALVAGE_PRINTABLE);
+
+ if (name != NULL) {
+ /* Find the real name of the file. */
+ if ((ret = __db_appname(env,
+ DB_APP_DATA, name, &dbp->dirname, &real_name)) != 0)
+ goto err;
+
+ /*
+ * Our first order of business is to verify page 0, which is the
+ * metadata page for the master database of subdatabases or of
+ * the only database in the file. We want to do this by hand
+ * rather than just calling __db_open in case it's
+ * corrupt--various things in __db_open might act funny.
+ *
+ * Once we know the metadata page is healthy, I believe that
+ * it's safe to open the database normally and then use the page
+ * swapping code, which makes life easier.
+ */
+ if ((ret = __os_open(env,
+ real_name, 0, DB_OSO_RDONLY, 0, &fhp)) != 0)
+ goto err;
+ } else {
+ MAKE_INMEM(dbp);
+ }
+
+ /* Verify the metadata page 0; set pagesize and type. */
+ if ((ret = __db_vrfy_pagezero(dbp, vdp, fhp, subdb, flags)) != 0) {
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else
+ goto err;
+ }
+
+ /*
+ * We can assume at this point that dbp->pagesize and dbp->type are
+ * set correctly, or at least as well as they can be, and that
+ * locking, logging, and txns are not in use. Thus we can trust
+ * the memp code not to look at the page, and thus to be safe
+ * enough to use.
+ *
+ * The dbp is not open, but the file is open in the fhp, and we
+ * cannot assume that __db_open is safe. Call __env_setup,
+ * the [safe] part of __db_open that initializes the environment--
+ * and the mpool--manually.
+ */
+ if ((ret = __env_setup(dbp, NULL,
+ name, subdb, TXN_INVALID, DB_ODDFILESIZE | DB_RDONLY)) != 0)
+ goto err;
+
+ /*
+ * Set our name in the Queue subsystem; we may need it later
+ * to deal with extents. In-memory databases are not allowed to have
+ * extents.
+ */
+ if (dbp->type == DB_QUEUE && name != NULL &&
+ (ret = __qam_set_ext_data(dbp, name)) != 0)
+ goto err;
+
+ /* Mark the dbp as opened, so that we correctly handle its close. */
+ F_SET(dbp, DB_AM_OPEN_CALLED);
+
+ /*
+ * Find out the page number of the last page in the database. We'll
+ * use this later to verify the metadata page. We don't verify now
+ * because the data from __db_vrfy_pagezero could be stale.
+ */
+ if ((ret = __memp_get_last_pgno(dbp->mpf, &vdp->last_pgno)) != 0)
+ goto err;
+ /*
+ * DB_ORDERCHKONLY is a special case; our file consists of
+ * several subdatabases, which use different hash, bt_compare,
+ * and/or dup_compare functions. Consequently, we couldn't verify
+ * sorting and hashing simply by calling DB->verify() on the file.
+ * DB_ORDERCHKONLY allows us to come back and check those things; it
+ * requires a subdatabase, and assumes that everything but that
+ * database's sorting/hashing is correct.
+ */
+ if (LF_ISSET(DB_ORDERCHKONLY)) {
+ ret = __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags);
+ goto done;
+ }
+
+ sflags = flags;
+ if (dbp->p_internal != NULL)
+ LF_CLR(DB_SALVAGE);
+
+ /*
+ * When salvaging, we use a db to keep track of whether we've seen a
+ * given overflow or dup page in the course of traversing normal data.
+ * If in the end we have not, we assume its key got lost and print it
+ * with key "UNKNOWN".
+ */
+ if (LF_ISSET(DB_SALVAGE)) {
+ if ((ret = __db_salvage_init(vdp)) != 0)
+ goto err;
+
+ /*
+ * If we're not being aggressive, salvage by walking the tree
+ * and only printing the leaves we find. "has_subdbs" will
+ * indicate whether we found subdatabases.
+ */
+ if (!LF_ISSET(DB_AGGRESSIVE) && __db_salvage_all(
+ dbp, vdp, handle, callback, flags, &has_subdbs) != 0)
+ isbad = 1;
+
+ /*
+ * If we have subdatabases, flag if any keys are found that
+ * don't belong to a subdatabase -- they'll need to have an
+ * "__OTHER__" subdatabase header printed first.
+ */
+ if (has_subdbs) {
+ F_SET(vdp, SALVAGE_PRINTHEADER);
+ F_SET(vdp, SALVAGE_HASSUBDBS);
+ }
+ }
+
+ /* Walk all the pages, if a page cannot be read, verify structure. */
+ if ((ret =
+ __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)) != 0) {
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else if (ret != DB_PAGE_NOTFOUND)
+ goto err;
+ }
+
+ /* If we're verifying, verify inter-page structure. */
+ if (!LF_ISSET(DB_SALVAGE) && isbad == 0)
+ if ((t_ret = __db_vrfy_structure(dbp,
+ vdp, name, 0, lp, rp, flags)) != 0) {
+ if (t_ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else
+ goto err;
+ }
+
+ /*
+ * If we're salvaging, output with key UNKNOWN any overflow or dup pages
+ * we haven't been able to put in context. Then destroy the salvager's
+ * state-saving database.
+ */
+ if (LF_ISSET(DB_SALVAGE)) {
+ if ((ret = __db_salvage_unknowns(dbp,
+ vdp, handle, callback, flags)) != 0)
+ isbad = 1;
+ }
+
+ flags = sflags;
+
+#ifdef HAVE_PARTITION
+ if (t_ret == 0 && dbp->p_internal != NULL)
+ t_ret = __part_verify(dbp, vdp, name, handle, callback, flags);
+#endif
+
+ if (ret == 0)
+ ret = t_ret;
+
+ /* Don't display a footer for a database holding other databases. */
+ if (LF_ISSET(DB_SALVAGE | DB_VERIFY_PARTITION) == DB_SALVAGE &&
+ (!has_subdbs || F_ISSET(vdp, SALVAGE_PRINTFOOTER)))
+ (void)__db_prfooter(handle, callback);
+
+done: err:
+ /* Send feedback that we're done. */
+ if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL)
+ dbp->db_feedback(dbp, DB_VERIFY, 100);
+
+ if (LF_ISSET(DB_SALVAGE) &&
+ (t_ret = __db_salvage_destroy(vdp)) != 0 && ret == 0)
+ ret = t_ret;
+ if (fhp != NULL &&
+ (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
+ ret = t_ret;
+ if (vdp != NULL &&
+ (t_ret = __db_vrfy_dbinfo_destroy(env, vdp)) != 0 && ret == 0)
+ ret = t_ret;
+ if (real_name != NULL)
+ __os_free(env, real_name);
+
+ /*
+ * DB_VERIFY_FATAL is a private error, translate to a public one.
+ *
+ * If we didn't find a page, it's probably a page number was corrupted.
+ * Return the standard corruption error.
+ *
+ * Otherwise, if we found corruption along the way, set the return.
+ */
+ if (ret == DB_VERIFY_FATAL ||
+ ret == DB_PAGE_NOTFOUND || (ret == 0 && isbad == 1))
+ ret = DB_VERIFY_BAD;
+
+ /* Make sure there's a public complaint if we found corruption. */
+ if (ret != 0)
+ __db_err(env, ret, "%s", name);
+
+ return (ret);
+}
+
+/*
+ * __db_vrfy_getpagezero --
+ * Store the master metadata page into a local buffer. For safety, skip
+ * the DB paging code and read the page directly from disk (via seek and
+ * read) or the mpool.
+ */
+static int
+__db_vrfy_getpagezero(dbp, fhp, name, mbuf, flags)
+ DB *dbp;
+ DB_FH *fhp;
+ const char *name;
+ u_int8_t *mbuf;
+ u_int32_t flags;
+{
+ DB_MPOOLFILE *mpf;
+ ENV *env;
+ PAGE *h;
+ db_pgno_t pgno;
+ int ret, t_ret;
+ size_t nr;
+
+ env = dbp->env;
+
+ if (F_ISSET(dbp, DB_AM_INMEM)) {
+ /*
+ * Now get the metadata page from the cache, if possible. If
+ * we're verifying an in-memory db, this is the only metadata
+ * page we have.
+ *
+ *
+ * Open the in-memory db file and get the metadata page.
+ */
+ if ((ret = __memp_fcreate_pp(env->dbenv, &mpf, DB_VERIFY)) != 0)
+ return (ret);
+ if ((ret = __memp_set_flags(mpf, DB_MPOOL_NOFILE, 1)) != 0)
+ goto mpf_err;
+ if ((ret = __memp_fopen_pp(mpf,
+ name, DB_ODDFILESIZE | DB_RDONLY, 0, 0)) != 0)
+ goto mpf_err;
+ pgno = PGNO_BASE_MD;
+ if ((ret = __memp_fget_pp(mpf, &pgno, NULL, 0, &h)) != 0) {
+ __db_err(env, ret, DB_STR_A("0747",
+ "Metadata page %lu cannot be read from mpool",
+ "%lu"), (u_long)pgno);
+ goto mpf_err;
+ }
+ memcpy(mbuf, (u_int8_t *)h, DBMETASIZE);
+ ret = __memp_fput_pp(mpf, h, DB_PRIORITY_UNCHANGED, 0);
+mpf_err: if ((t_ret = __memp_fclose_pp(mpf, 0)) != 0 || ret != 0) {
+ return (ret == 0 ? t_ret : ret);
+ }
+ } else {
+ /*
+ * Seek to the metadata page.
+ *
+ * Note that if we're just starting a verification, dbp->pgsize
+ * may be zero; this is okay, as we want page zero anyway and
+ * 0*0 == 0.
+ */
+ if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0 ||
+ (ret = __os_read(env, fhp, mbuf, DBMETASIZE, &nr)) != 0) {
+ __db_err(env, ret, DB_STR_A("0520",
+ "Metadata page %lu cannot be read", "%lu"),
+ (u_long)PGNO_BASE_MD);
+ return (ret);
+ }
+
+ if (nr != DBMETASIZE) {
+ EPRINT((env, DB_STR_A("0521",
+ "Page %lu: Incomplete metadata page", "%lu"),
+ (u_long)PGNO_BASE_MD));
+ return (DB_VERIFY_FATAL);
+ }
+ }
+
+ return (ret);
+}
+
+/*
+ * __db_vrfy_pagezero --
+ * Verify the master metadata page. Use seek, read, and a local buffer
+ * rather than the DB paging code, for safety.
+ *
+ * Must correctly (or best-guess) set dbp->type and dbp->pagesize.
+ */
+static int
+__db_vrfy_pagezero(dbp, vdp, fhp, name, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ DB_FH *fhp;
+ const char *name;
+ u_int32_t flags;
+{
+ DBMETA *meta;
+ ENV *env;
+ VRFY_PAGEINFO *pip;
+ db_pgno_t freelist;
+ int isbad, ret, swapped;
+ u_int8_t mbuf[DBMETASIZE];
+
+ isbad = ret = swapped = 0;
+ freelist = 0;
+ env = dbp->env;
+ meta = (DBMETA *)mbuf;
+ dbp->type = DB_UNKNOWN;
+
+ if ((ret = __db_vrfy_getpagezero(dbp, fhp, name, mbuf, flags)) != 0)
+ return (ret);
+
+ if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0)
+ return (ret);
+
+ if ((ret = __db_chk_meta(env, dbp, meta, 1)) != 0) {
+ EPRINT((env, DB_STR_A("0522",
+ "Page %lu: metadata page corrupted", "%lu"),
+ (u_long)PGNO_BASE_MD));
+ isbad = 1;
+ if (ret != DB_CHKSUM_FAIL) {
+ EPRINT((env, DB_STR_A("0523",
+ "Page %lu: could not check metadata page", "%lu"),
+ (u_long)PGNO_BASE_MD));
+ return (DB_VERIFY_FATAL);
+ }
+ }
+
+ /*
+ * Check all of the fields that we can.
+ *
+ * 08-11: Current page number. Must == pgno.
+ * Note that endianness doesn't matter--it's zero.
+ */
+ if (meta->pgno != PGNO_BASE_MD) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0524",
+ "Page %lu: pgno incorrectly set to %lu", "%lu %lu"),
+ (u_long)PGNO_BASE_MD, (u_long)meta->pgno));
+ }
+
+ /* 12-15: Magic number. Must be one of valid set. */
+ if (__db_is_valid_magicno(meta->magic, &dbp->type))
+ swapped = 0;
+ else {
+ M_32_SWAP(meta->magic);
+ if (__db_is_valid_magicno(meta->magic,
+ &dbp->type))
+ swapped = 1;
+ else {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0525",
+ "Page %lu: bad magic number %lu", "%lu %lu"),
+ (u_long)PGNO_BASE_MD, (u_long)meta->magic));
+ }
+ }
+
+ /*
+ * 16-19: Version. Must be current; for now, we
+ * don't support verification of old versions.
+ */
+ if (swapped)
+ M_32_SWAP(meta->version);
+ if ((dbp->type == DB_BTREE &&
+ (meta->version > DB_BTREEVERSION ||
+ meta->version < DB_BTREEOLDVER)) ||
+ (dbp->type == DB_HASH &&
+ (meta->version > DB_HASHVERSION ||
+ meta->version < DB_HASHOLDVER)) ||
+ (dbp->type == DB_HEAP &&
+ (meta->version > DB_HEAPVERSION ||
+ meta->version < DB_HEAPOLDVER)) ||
+ (dbp->type == DB_QUEUE &&
+ (meta->version > DB_QAMVERSION ||
+ meta->version < DB_QAMOLDVER))) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0526",
+ "Page %lu: unsupported DB version %lu; extraneous errors may result",
+ "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->version));
+ }
+
+ /*
+ * 20-23: Pagesize. Must be power of two,
+ * greater than 512, and less than 64K.
+ */
+ if (swapped)
+ M_32_SWAP(meta->pagesize);
+ if (IS_VALID_PAGESIZE(meta->pagesize))
+ dbp->pgsize = meta->pagesize;
+ else {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0527", "Page %lu: bad page size %lu",
+ "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->pagesize));
+
+ /*
+ * Now try to settle on a pagesize to use.
+ * If the user-supplied one is reasonable,
+ * use it; else, guess.
+ */
+ if (!IS_VALID_PAGESIZE(dbp->pgsize))
+ dbp->pgsize = __db_guesspgsize(env, fhp);
+ }
+
+ /*
+ * 25: Page type. Must be correct for dbp->type,
+ * which is by now set as well as it can be.
+ */
+ /* Needs no swapping--only one byte! */
+ if ((dbp->type == DB_BTREE && meta->type != P_BTREEMETA) ||
+ (dbp->type == DB_HASH && meta->type != P_HASHMETA) ||
+ (dbp->type == DB_HEAP && meta->type != P_HEAPMETA) ||
+ (dbp->type == DB_QUEUE && meta->type != P_QAMMETA)) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0528", "Page %lu: bad page type %lu",
+ "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->type));
+ }
+
+ /*
+ * 26: Meta-flags.
+ */
+ if (meta->metaflags != 0) {
+ if (FLD_ISSET(meta->metaflags,
+ ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0529",
+ "Page %lu: bad meta-data flags value %#lx",
+ "%lu %#lx"), (u_long)PGNO_BASE_MD,
+ (u_long)meta->metaflags));
+ }
+ if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
+ F_SET(pip, VRFY_HAS_CHKSUM);
+ if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))
+ F_SET(pip, VRFY_HAS_PART_RANGE);
+ if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK))
+ F_SET(pip, VRFY_HAS_PART_CALLBACK);
+
+ if (FLD_ISSET(meta->metaflags,
+ DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) &&
+ (ret = __partition_init(dbp, meta->metaflags)) != 0)
+ return (ret);
+ }
+
+ /*
+ * 28-31: Free list page number.
+ * 32-35: Last page in database file.
+ * We'll verify last_pgno once we open the db in the mpool;
+ * for now, just store it.
+ */
+ if (swapped)
+ M_32_SWAP(meta->free);
+ freelist = meta->free;
+ if (swapped)
+ M_32_SWAP(meta->last_pgno);
+ vdp->meta_last_pgno = meta->last_pgno;
+
+ /*
+ * Initialize vdp->pages to fit a single pageinfo structure for
+ * this one page. We'll realloc later when we know how many
+ * pages there are.
+ */
+ pip->pgno = PGNO_BASE_MD;
+ pip->type = meta->type;
+
+ /*
+ * Signal that we still have to check the info specific to
+ * a given type of meta page.
+ */
+ F_SET(pip, VRFY_INCOMPLETE);
+
+ pip->free = freelist;
+
+ if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
+ return (ret);
+
+ /* Set up the dbp's fileid. We don't use the regular open path. */
+ memcpy(dbp->fileid, meta->uid, DB_FILE_ID_LEN);
+ dbp->preserve_fid = 1;
+
+ if (swapped == 1)
+ F_SET(dbp, DB_AM_SWAP);
+
+ return (isbad ? DB_VERIFY_BAD : 0);
+}
+
+/*
+ * __db_vrfy_walkpages --
+ * Main loop of the verifier/salvager. Walks through,
+ * page by page, and verifies all pages and/or prints all data pages.
+ */
+static int
+__db_vrfy_walkpages(dbp, vdp, handle, callback, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ u_int32_t flags;
+{
+ DB_MPOOLFILE *mpf;
+ ENV *env;
+ PAGE *h;
+ VRFY_PAGEINFO *pip;
+ db_pgno_t i;
+ int ret, t_ret, isbad;
+
+ env = dbp->env;
+ mpf = dbp->mpf;
+ h = NULL;
+ ret = isbad = t_ret = 0;
+
+ for (i = 0; i <= vdp->last_pgno; i++) {
+ /*
+ * If DB_SALVAGE is set, we inspect our database of completed
+ * pages, and skip any we've already printed in the subdb pass.
+ */
+ if (LF_ISSET(DB_SALVAGE) && (__db_salvage_isdone(vdp, i) != 0))
+ continue;
+
+ /*
+ * An individual page get can fail if:
+ * * This is a hash database, it is expected to find
+ * empty buckets, which don't have allocated pages. Create
+ * a dummy page so the verification can proceed.
+ * * We are salvaging, flag the error and continue.
+ */
+ if ((t_ret = __memp_fget(mpf, &i,
+ vdp->thread_info, NULL, 0, &h)) != 0) {
+ if (dbp->type == DB_HASH ||
+ (dbp->type == DB_QUEUE &&
+ F_ISSET(dbp, DB_AM_INMEM))) {
+ if ((t_ret =
+ __db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
+ goto err1;
+ pip->type = P_INVALID;
+ pip->pgno = i;
+ F_CLR(pip, VRFY_IS_ALLZEROES);
+ F_SET(pip, VRFY_NONEXISTENT);
+ if ((t_ret = __db_vrfy_putpageinfo(
+ env, vdp, pip)) != 0)
+ goto err1;
+ continue;
+ }
+ if (t_ret == DB_PAGE_NOTFOUND) {
+ EPRINT((env, DB_STR_A("0530",
+ "Page %lu: beyond the end of the file, metadata page has last page as %lu",
+ "%lu %lu"), (u_long)i,
+ (u_long)vdp->last_pgno));
+ if (ret == 0)
+ return (t_ret);
+ }
+
+err1: if (ret == 0)
+ ret = t_ret;
+ if (LF_ISSET(DB_SALVAGE))
+ continue;
+ return (ret);
+ }
+
+ if (LF_ISSET(DB_SALVAGE)) {
+ /*
+ * We pretty much don't want to quit unless a
+ * bomb hits. May as well return that something
+ * was screwy, however.
+ */
+ if ((t_ret = __db_salvage_pg(dbp,
+ vdp, i, h, handle, callback, flags)) != 0) {
+ if (ret == 0)
+ ret = t_ret;
+ isbad = 1;
+ }
+ } else {
+ /*
+ * If we are not salvaging, and we get any error
+ * other than DB_VERIFY_BAD, return immediately;
+ * it may not be safe to proceed. If we get
+ * DB_VERIFY_BAD, keep going; listing more errors
+ * may make it easier to diagnose problems and
+ * determine the magnitude of the corruption.
+ *
+ * Verify info common to all page types.
+ */
+ if (i != PGNO_BASE_MD) {
+ ret = __db_vrfy_common(dbp, vdp, h, i, flags);
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else if (ret != 0)
+ goto err;
+ }
+
+ switch (TYPE(h)) {
+ case P_INVALID:
+ ret = __db_vrfy_invalid(dbp, vdp, h, i, flags);
+ break;
+ case __P_DUPLICATE:
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0531",
+ "Page %lu: old-style duplicate page",
+ "%lu"), (u_long)i));
+ break;
+ case P_HASH_UNSORTED:
+ case P_HASH:
+ ret = __ham_vrfy(dbp, vdp, h, i, flags);
+ break;
+ case P_HEAP:
+ case P_IHEAP:
+ ret = __heap_vrfy(dbp, vdp, h, i, flags);
+ break;
+ case P_IBTREE:
+ case P_IRECNO:
+ case P_LBTREE:
+ case P_LDUP:
+ ret = __bam_vrfy(dbp, vdp, h, i, flags);
+ break;
+ case P_LRECNO:
+ ret = __ram_vrfy_leaf(dbp, vdp, h, i, flags);
+ break;
+ case P_OVERFLOW:
+ ret = __db_vrfy_overflow(dbp, vdp, h, i, flags);
+ break;
+ case P_HASHMETA:
+ ret = __ham_vrfy_meta(dbp,
+ vdp, (HMETA *)h, i, flags);
+ break;
+ case P_HEAPMETA:
+ ret = __heap_vrfy_meta(dbp,
+ vdp, (HEAPMETA *)h, i, flags);
+ break;
+ case P_BTREEMETA:
+ ret = __bam_vrfy_meta(dbp,
+ vdp, (BTMETA *)h, i, flags);
+ break;
+ case P_QAMMETA:
+ ret = __qam_vrfy_meta(dbp,
+ vdp, (QMETA *)h, i, flags);
+ break;
+ case P_QAMDATA:
+ ret = __qam_vrfy_data(dbp,
+ vdp, (QPAGE *)h, i, flags);
+ break;
+ default:
+ EPRINT((env, DB_STR_A("0532",
+ "Page %lu: unknown page type %lu",
+ "%lu %lu"), (u_long)i, (u_long)TYPE(h)));
+ isbad = 1;
+ break;
+ }
+
+ /*
+ * Set up error return.
+ */
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else if (ret != 0)
+ goto err;
+
+ /*
+ * Provide feedback to the application about our
+ * progress. The range 0-50% comes from the fact
+ * that this is the first of two passes through the
+ * database (front-to-back, then top-to-bottom).
+ */
+ if (dbp->db_feedback != NULL)
+ dbp->db_feedback(dbp, DB_VERIFY,
+ (int)((i + 1) * 50 / (vdp->last_pgno + 1)));
+ }
+
+ /*
+ * Just as with the page get, bail if and only if we're
+ * not salvaging.
+ */
+ if ((t_ret = __memp_fput(mpf,
+ vdp->thread_info, h, dbp->priority)) != 0) {
+ if (ret == 0)
+ ret = t_ret;
+ if (!LF_ISSET(DB_SALVAGE))
+ return (ret);
+ }
+ }
+
+ /*
+ * If we've seen a Queue metadata page, we may need to walk Queue
+ * extent pages that won't show up between 0 and vdp->last_pgno.
+ */
+ if (F_ISSET(vdp, VRFY_QMETA_SET) && (t_ret =
+ __qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags)) != 0) {
+ if (ret == 0)
+ ret = t_ret;
+ if (t_ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else if (!LF_ISSET(DB_SALVAGE))
+ return (ret);
+ }
+
+ if (0) {
+err: if (h != NULL && (t_ret = __memp_fput(mpf,
+ vdp->thread_info, h, dbp->priority)) != 0)
+ return (ret == 0 ? t_ret : ret);
+ }
+
+ return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
+}
+
+/*
+ * __db_vrfy_structure--
+ * After a beginning-to-end walk through the database has been
+ * completed, put together the information that has been collected
+ * to verify the overall database structure.
+ *
+ * Should only be called if we want to do a database verification,
+ * i.e. if DB_SALVAGE is not set.
+ */
+static int
+__db_vrfy_structure(dbp, vdp, dbname, meta_pgno, lp, rp, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ const char *dbname;
+ db_pgno_t meta_pgno;
+ void *lp, *rp;
+ u_int32_t flags;
+{
+ DB *pgset;
+ ENV *env;
+ VRFY_PAGEINFO *pip;
+ db_pgno_t i;
+ int ret, isbad, hassubs, p;
+
+ isbad = 0;
+ pip = NULL;
+ env = dbp->env;
+ pgset = vdp->pgset;
+
+ /*
+ * Providing feedback here is tricky; in most situations,
+ * we fetch each page one more time, but we do so in a top-down
+ * order that depends on the access method. Worse, we do this
+ * recursively in btree, such that on any call where we're traversing
+ * a subtree we don't know where that subtree is in the whole database;
+ * worse still, any given database may be one of several subdbs.
+ *
+ * The solution is to decrement a counter vdp->pgs_remaining each time
+ * we verify (and call feedback on) a page. We may over- or
+ * under-count, but the structure feedback function will ensure that we
+ * never give a percentage under 50 or over 100. (The first pass
+ * covered the range 0-50%.)
+ */
+ if (dbp->db_feedback != NULL)
+ vdp->pgs_remaining = vdp->last_pgno + 1;
+
+ /*
+ * Call the appropriate function to downwards-traverse the db type.
+ */
+ switch (dbp->type) {
+ case DB_BTREE:
+ case DB_RECNO:
+ if ((ret =
+ __bam_vrfy_structure(dbp, vdp, 0, lp, rp, flags)) != 0) {
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else
+ goto err;
+ }
+
+ /*
+ * If we have subdatabases and we know that the database is,
+ * thus far, sound, it's safe to walk the tree of subdatabases.
+ * Do so, and verify the structure of the databases within.
+ */
+ if ((ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) != 0)
+ goto err;
+ hassubs = F_ISSET(pip, VRFY_HAS_SUBDBS) ? 1 : 0;
+ if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
+ goto err;
+ pip = NULL;
+
+ if (isbad == 0 && hassubs)
+ if ((ret =
+ __db_vrfy_subdbs(dbp, vdp, dbname, flags)) != 0) {
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else
+ goto err;
+ }
+ break;
+ case DB_HASH:
+ if ((ret = __ham_vrfy_structure(dbp, vdp, 0, flags)) != 0) {
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else
+ goto err;
+ }
+ break;
+ case DB_HEAP:
+ if ((ret = __heap_vrfy_structure(dbp, vdp, flags)) != 0) {
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ }
+ /* Skip the freelist check for heap, it doesn't apply. */
+ goto err;
+ case DB_QUEUE:
+ if ((ret = __qam_vrfy_structure(dbp, vdp, flags)) != 0) {
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ }
+
+ /*
+ * Queue pages may be unreferenced and totally zeroed, if
+ * they're empty; queue doesn't have much structure, so
+ * this is unlikely to be wrong in any troublesome sense.
+ * Skip to "err".
+ */
+ goto err;
+ case DB_UNKNOWN:
+ default:
+ ret = __db_unknown_path(env, "__db_vrfy_structure");
+ goto err;
+ }
+
+ /* Walk free list. */
+ if ((ret =
+ __db_vrfy_freelist(dbp, vdp, meta_pgno, flags)) == DB_VERIFY_BAD)
+ isbad = 1;
+
+ /*
+ * If structure checks up until now have failed, it's likely that
+ * checking what pages have been missed will result in oodles of
+ * extraneous error messages being EPRINTed. Skip to the end
+ * if this is the case; we're going to be printing at least one
+ * error anyway, and probably all the more salient ones.
+ */
+ if (ret != 0 || isbad == 1)
+ goto err;
+
+ /*
+ * Make sure no page has been missed and that no page is still marked
+ * "all zeroes" unless we are looking at unused hash bucket pages or
+ * pagesoff the end of database.
+ */
+ for (i = 0; i < vdp->last_pgno + 1; i++) {
+ if ((ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
+ goto err;
+ if ((ret = __db_vrfy_pgset_get(pgset,
+ vdp->thread_info, vdp->txn, i, &p)) != 0)
+ goto err;
+ if (pip->type == P_OVERFLOW) {
+ if ((u_int32_t)p != pip->refcount) {
+ EPRINT((env, DB_STR_A("0533",
+ "Page %lu: overflow refcount %lu, referenced %lu times",
+ "%lu %lu %lu"), (u_long)i,
+ (u_long)pip->refcount, (u_long)p));
+ isbad = 1;
+ }
+ } else if (p == 0 &&
+#ifndef HAVE_FTRUNCATE
+ !(i > vdp->meta_last_pgno &&
+ (F_ISSET(pip, VRFY_IS_ALLZEROES) || pip->type == P_HASH)) &&
+#endif
+ !(dbp->type == DB_HASH &&
+ (pip->type == P_HASH || pip->type == P_INVALID))) {
+ /*
+ * It is OK for unreferenced hash buckets to be
+ * marked invalid and unreferenced.
+ */
+ EPRINT((env, DB_STR_A("0534",
+ "Page %lu: unreferenced page", "%lu"), (u_long)i));
+ isbad = 1;
+ }
+
+ if (F_ISSET(pip, VRFY_IS_ALLZEROES)
+#ifndef HAVE_FTRUNCATE
+ && i <= vdp->meta_last_pgno
+#endif
+ ) {
+ EPRINT((env, DB_STR_A("0535",
+ "Page %lu: totally zeroed page", "%lu"),
+ (u_long)i));
+ isbad = 1;
+ }
+ if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
+ goto err;
+ pip = NULL;
+ }
+
+err: if (pip != NULL)
+ (void)__db_vrfy_putpageinfo(env, vdp, pip);
+
+ return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
+}
+
+/*
+ * __db_is_valid_magicno
+ */
+static int
+__db_is_valid_magicno(magic, typep)
+ u_int32_t magic;
+ DBTYPE *typep;
+{
+ switch (magic) {
+ case DB_BTREEMAGIC:
+ *typep = DB_BTREE;
+ return (1);
+ case DB_HASHMAGIC:
+ *typep = DB_HASH;
+ return (1);
+ case DB_HEAPMAGIC:
+ *typep = DB_HEAP;
+ return (1);
+ case DB_QAMMAGIC:
+ *typep = DB_QUEUE;
+ return (1);
+ default:
+ break;
+ }
+ *typep = DB_UNKNOWN;
+ return (0);
+}
+
+/*
+ * __db_vrfy_common --
+ * Verify info common to all page types.
+ *
+ * PUBLIC: int __db_vrfy_common
+ * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
+ */
+int
+__db_vrfy_common(dbp, vdp, h, pgno, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ PAGE *h;
+ db_pgno_t pgno;
+ u_int32_t flags;
+{
+ ENV *env;
+ VRFY_PAGEINFO *pip;
+ int ret, t_ret;
+ u_int8_t *p;
+
+ env = dbp->env;
+
+ if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+ return (ret);
+
+ pip->pgno = pgno;
+ F_CLR(pip, VRFY_IS_ALLZEROES);
+
+ /*
+ * Hash expands the table by leaving some pages between the
+ * old last and the new last totally zeroed. These pages may
+ * not be all zero if they were used, freed and then reallocated.
+ *
+ * Queue will create sparse files if sparse record numbers are used.
+ */
+ if (pgno != 0 && PGNO(h) == 0) {
+ F_SET(pip, VRFY_IS_ALLZEROES);
+ for (p = (u_int8_t *)h; p < (u_int8_t *)h + dbp->pgsize; p++)
+ if (*p != 0) {
+ F_CLR(pip, VRFY_IS_ALLZEROES);
+ break;
+ }
+ /*
+ * Mark it as a hash, and we'll
+ * check that that makes sense structurally later.
+ * (The queue verification doesn't care, since queues
+ * don't really have much in the way of structure.)
+ */
+ if (dbp->type != DB_HEAP)
+ pip->type = P_HASH;
+ ret = 0;
+ goto err; /* well, not really an err. */
+ }
+
+ if (PGNO(h) != pgno) {
+ EPRINT((env, DB_STR_A("0536", "Page %lu: bad page number %lu",
+ "%lu %lu"), (u_long)pgno, (u_long)h->pgno));
+ ret = DB_VERIFY_BAD;
+ }
+
+ switch (h->type) {
+ case P_INVALID: /* Order matches ordinal value. */
+ case P_HASH_UNSORTED:
+ case P_IBTREE:
+ case P_IRECNO:
+ case P_LBTREE:
+ case P_LRECNO:
+ case P_OVERFLOW:
+ case P_HASHMETA:
+ case P_BTREEMETA:
+ case P_QAMMETA:
+ case P_QAMDATA:
+ case P_LDUP:
+ case P_HASH:
+ case P_HEAP:
+ case P_IHEAP:
+ case P_HEAPMETA:
+ break;
+ default:
+ EPRINT((env, DB_STR_A("0537", "Page %lu: bad page type %lu",
+ "%lu %lu"), (u_long)pgno, (u_long)h->type));
+ ret = DB_VERIFY_BAD;
+ }
+ pip->type = h->type;
+
+err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __db_vrfy_invalid --
+ * Verify P_INVALID page.
+ * (Yes, there's not much to do here.)
+ */
+static int
+__db_vrfy_invalid(dbp, vdp, h, pgno, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ PAGE *h;
+ db_pgno_t pgno;
+ u_int32_t flags;
+{
+ ENV *env;
+ VRFY_PAGEINFO *pip;
+ int ret, t_ret;
+
+ env = dbp->env;
+
+ if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+ return (ret);
+ pip->next_pgno = pip->prev_pgno = 0;
+
+ if (!IS_VALID_PGNO(NEXT_PGNO(h))) {
+ EPRINT((env, DB_STR_A("0538", "Page %lu: invalid next_pgno %lu",
+ "%lu %lu"), (u_long)pgno, (u_long)NEXT_PGNO(h)));
+ ret = DB_VERIFY_BAD;
+ } else
+ pip->next_pgno = NEXT_PGNO(h);
+
+ if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
+
+/*
+ * __db_vrfy_datapage --
+ * Verify elements common to data pages (P_HASH, P_LBTREE,
+ * P_IBTREE, P_IRECNO, P_LRECNO, P_OVERFLOW, P_DUPLICATE)--i.e.,
+ * those defined in the PAGE structure.
+ *
+ * Called from each of the per-page routines, after the
+ * all-page-type-common elements of pip have been verified and filled
+ * in.
+ *
+ * PUBLIC: int __db_vrfy_datapage
+ * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
+ */
+int
+__db_vrfy_datapage(dbp, vdp, h, pgno, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ PAGE *h;
+ db_pgno_t pgno;
+ u_int32_t flags;
+{
+ ENV *env;
+ VRFY_PAGEINFO *pip;
+ u_int32_t smallest_entry;
+ int isbad, ret, t_ret;
+
+ env = dbp->env;
+
+ if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+ return (ret);
+ isbad = 0;
+
+ /*
+ * prev_pgno and next_pgno: store for inter-page checks,
+ * verify that they point to actual pages and not to self.
+ *
+ * !!!
+ * Internal btree pages, as well as heap pages, do not maintain these
+ * fields (indeed, they overload them). Skip.
+ */
+ if (TYPE(h) != P_IBTREE &&
+ TYPE(h) != P_IRECNO && TYPE(h) != P_HEAP && TYPE(h) != P_IHEAP) {
+ if (!IS_VALID_PGNO(PREV_PGNO(h)) || PREV_PGNO(h) == pip->pgno) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0539",
+ "Page %lu: invalid prev_pgno %lu", "%lu %lu"),
+ (u_long)pip->pgno, (u_long)PREV_PGNO(h)));
+ }
+ if (!IS_VALID_PGNO(NEXT_PGNO(h)) || NEXT_PGNO(h) == pip->pgno) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0540",
+ "Page %lu: invalid next_pgno %lu", "%lu %lu"),
+ (u_long)pip->pgno, (u_long)NEXT_PGNO(h)));
+ }
+ pip->prev_pgno = PREV_PGNO(h);
+ pip->next_pgno = NEXT_PGNO(h);
+ }
+
+ /*
+ * Verify the number of entries on the page: there's no good way to
+ * determine if this is accurate. The best we can do is verify that
+ * it's not more than can, in theory, fit on the page. Then, we make
+ * sure there are at least this many valid elements in inp[], and
+ * hope the test catches most cases.
+ */
+ switch (TYPE(h)) {
+ case P_HASH_UNSORTED:
+ case P_HASH:
+ smallest_entry = HKEYDATA_PSIZE(0);
+ break;
+ case P_HEAP:
+ smallest_entry = sizeof(HEAPHDR) + sizeof(db_indx_t);
+ break;
+ case P_IHEAP:
+ /* Really high_pgno. */
+ pip->prev_pgno = PREV_PGNO(h);
+ smallest_entry = 0;
+ break;
+ case P_IBTREE:
+ smallest_entry = BINTERNAL_PSIZE(0);
+ break;
+ case P_IRECNO:
+ smallest_entry = RINTERNAL_PSIZE;
+ break;
+ case P_LBTREE:
+ case P_LDUP:
+ case P_LRECNO:
+ smallest_entry = BKEYDATA_PSIZE(0);
+ break;
+ default:
+ smallest_entry = 0;
+ break;
+ }
+ if (smallest_entry * NUM_ENT(h) / 2 > dbp->pgsize) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0541",
+ "Page %lu: too many entries: %lu",
+ "%lu %lu"), (u_long)pgno, (u_long)NUM_ENT(h)));
+ }
+
+ if (TYPE(h) != P_OVERFLOW)
+ pip->entries = NUM_ENT(h);
+
+ /*
+ * btree level. Should be zero unless we're a btree;
+ * if we are a btree, should be between LEAFLEVEL and MAXBTREELEVEL,
+ * and we need to save it off.
+ */
+ switch (TYPE(h)) {
+ case P_IBTREE:
+ case P_IRECNO:
+ if (LEVEL(h) < LEAFLEVEL + 1) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0542",
+ "Page %lu: bad btree level %lu", "%lu %lu"),
+ (u_long)pgno, (u_long)LEVEL(h)));
+ }
+ pip->bt_level = LEVEL(h);
+ break;
+ case P_LBTREE:
+ case P_LDUP:
+ case P_LRECNO:
+ if (LEVEL(h) != LEAFLEVEL) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0543",
+ "Page %lu: btree leaf page has incorrect level %lu",
+ "%lu %lu"), (u_long)pgno, (u_long)LEVEL(h)));
+ }
+ break;
+ default:
+ if (LEVEL(h) != 0) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0544",
+ "Page %lu: nonzero level %lu in non-btree database",
+ "%lu %lu"), (u_long)pgno, (u_long)LEVEL(h)));
+ }
+ break;
+ }
+
+ /*
+ * Even though inp[] occurs in all PAGEs, we look at it in the
+ * access-method-specific code, since btree and hash treat
+ * item lengths very differently, and one of the most important
+ * things we want to verify is that the data--as specified
+ * by offset and length--cover the right part of the page
+ * without overlaps, gaps, or violations of the page boundary.
+ */
+ if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
+}
+
+/*
+ * __db_vrfy_meta --
+ * Verify the access-method common parts of a meta page, using
+ * normal mpool routines.
+ *
+ * PUBLIC: int __db_vrfy_meta
+ * PUBLIC: __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t));
+ */
+int
+__db_vrfy_meta(dbp, vdp, meta, pgno, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ DBMETA *meta;
+ db_pgno_t pgno;
+ u_int32_t flags;
+{
+ DBTYPE dbtype, magtype;
+ ENV *env;
+ VRFY_PAGEINFO *pip;
+ int isbad, ret, t_ret;
+
+ isbad = 0;
+ env = dbp->env;
+
+ if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+ return (ret);
+
+ /* type plausible for a meta page */
+ switch (meta->type) {
+ case P_BTREEMETA:
+ dbtype = DB_BTREE;
+ break;
+ case P_HASHMETA:
+ dbtype = DB_HASH;
+ break;
+ case P_HEAPMETA:
+ dbtype = DB_HEAP;
+ break;
+ case P_QAMMETA:
+ dbtype = DB_QUEUE;
+ break;
+ default:
+ ret = __db_unknown_path(env, "__db_vrfy_meta");
+ goto err;
+ }
+
+ /* magic number valid */
+ if (!__db_is_valid_magicno(meta->magic, &magtype)) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0545", "Page %lu: invalid magic number",
+ "%lu"), (u_long)pgno));
+ }
+ if (magtype != dbtype) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0546",
+ "Page %lu: magic number does not match database type",
+ "%lu"), (u_long)pgno));
+ }
+
+ /* version */
+ if ((dbtype == DB_BTREE &&
+ (meta->version > DB_BTREEVERSION ||
+ meta->version < DB_BTREEOLDVER)) ||
+ (dbtype == DB_HASH &&
+ (meta->version > DB_HASHVERSION ||
+ meta->version < DB_HASHOLDVER)) ||
+ (dbtype == DB_HEAP &&
+ (meta->version > DB_HEAPVERSION ||
+ meta->version < DB_HEAPOLDVER)) ||
+ (dbtype == DB_QUEUE &&
+ (meta->version > DB_QAMVERSION ||
+ meta->version < DB_QAMOLDVER))) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0547",
+ "Page %lu: unsupported database version %lu; extraneous errors may result",
+ "%lu %lu"), (u_long)pgno, (u_long)meta->version));
+ }
+
+ /* pagesize */
+ if (meta->pagesize != dbp->pgsize) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0548", "Page %lu: invalid pagesize %lu",
+ "%lu %lu"), (u_long)pgno, (u_long)meta->pagesize));
+ }
+
+ /* Flags */
+ if (meta->metaflags != 0) {
+ if (FLD_ISSET(meta->metaflags,
+ ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0549",
+ "Page %lu: bad meta-data flags value %#lx",
+ "%lu %#lx"), (u_long)PGNO_BASE_MD,
+ (u_long)meta->metaflags));
+ }
+ if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
+ F_SET(pip, VRFY_HAS_CHKSUM);
+ if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))
+ F_SET(pip, VRFY_HAS_PART_RANGE);
+ if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK))
+ F_SET(pip, VRFY_HAS_PART_CALLBACK);
+ }
+
+ /*
+ * Free list.
+ *
+ * If this is not the main, master-database meta page, it
+ * should not have a free list.
+ */
+ if (pgno != PGNO_BASE_MD && meta->free != PGNO_INVALID) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0550",
+ "Page %lu: nonempty free list on subdatabase metadata page",
+ "%lu"), (u_long)pgno));
+ }
+
+ /* Can correctly be PGNO_INVALID--that's just the end of the list. */
+ if (IS_VALID_PGNO(meta->free))
+ pip->free = meta->free;
+ else {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0551",
+ "Page %lu: nonsensical free list pgno %lu", "%lu %lu"),
+ (u_long)pgno, (u_long)meta->free));
+ }
+
+ /*
+ * Check that the meta page agrees with what we got from mpool.
+ * If we don't have FTRUNCATE then mpool could include some
+ * zeroed pages at the end of the file, we assume the meta page
+ * is correct. Queue does not update the meta page's last_pgno.
+ */
+ if (pgno == PGNO_BASE_MD &&
+ dbtype != DB_QUEUE && meta->last_pgno != vdp->last_pgno) {
+#ifdef HAVE_FTRUNCATE
+ isbad = 1;
+ EPRINT((env, DB_STR_A("0552",
+ "Page %lu: last_pgno is not correct: %lu != %lu",
+ "%lu %lu %lu"), (u_long)pgno,
+ (u_long)meta->last_pgno, (u_long)vdp->last_pgno));
+#endif
+ vdp->meta_last_pgno = meta->last_pgno;
+ }
+
+ /*
+ * We have now verified the common fields of the metadata page.
+ * Clear the flag that told us they had been incompletely checked.
+ */
+ F_CLR(pip, VRFY_INCOMPLETE);
+
+err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
+}
+
+/*
+ * __db_vrfy_freelist --
+ * Walk free list, checking off pages and verifying absence of
+ * loops.
+ */
+static int
+__db_vrfy_freelist(dbp, vdp, meta, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ db_pgno_t meta;
+ u_int32_t flags;
+{
+ DB *pgset;
+ ENV *env;
+ VRFY_PAGEINFO *pip;
+ db_pgno_t cur_pgno, next_pgno;
+ int p, ret, t_ret;
+
+ env = dbp->env;
+ pgset = vdp->pgset;
+ DB_ASSERT(env, pgset != NULL);
+
+ if ((ret = __db_vrfy_getpageinfo(vdp, meta, &pip)) != 0)
+ return (ret);
+ for (next_pgno = pip->free;
+ next_pgno != PGNO_INVALID; next_pgno = pip->next_pgno) {
+ cur_pgno = pip->pgno;
+ if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
+ return (t_ret);
+
+ /* This shouldn't happen, but just in case. */
+ if (!IS_VALID_PGNO(next_pgno)) {
+ EPRINT((env, DB_STR_A("0553",
+ "Page %lu: invalid next_pgno %lu on free list page",
+ "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno));
+ return (DB_VERIFY_BAD);
+ }
+
+ if (next_pgno > vdp->last_pgno) {
+ EPRINT((env, DB_STR_A("0713",
+ "Page %lu: page %lu on free list beyond last_pgno %lu",
+ "%lu %lu %lu"), (u_long)cur_pgno,
+ (u_long)next_pgno, (u_long)vdp->last_pgno));
+ ret = DB_VERIFY_BAD;
+ }
+ /* Detect cycles. */
+ if ((t_ret = __db_vrfy_pgset_get(pgset,
+ vdp->thread_info, vdp->txn, next_pgno, &p)) != 0)
+ return (t_ret);
+ if (p != 0) {
+ EPRINT((env, DB_STR_A("0554",
+ "Page %lu: page %lu encountered a second time on free list",
+ "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno));
+ return (DB_VERIFY_BAD);
+ }
+ if ((t_ret = __db_vrfy_pgset_inc(pgset,
+ vdp->thread_info, vdp->txn, next_pgno)) != 0)
+ return (t_ret);
+
+ if ((t_ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0)
+ return (t_ret);
+
+ if (pip->type != P_INVALID) {
+ EPRINT((env, DB_STR_A("0555",
+ "Page %lu: non-invalid page %lu on free list",
+ "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno));
+ ret = DB_VERIFY_BAD; /* unsafe to continue */
+ break;
+ }
+ }
+
+ if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
+
+/*
+ * __db_vrfy_subdbs --
+ * Walk the known-safe master database of subdbs with a cursor,
+ * verifying the structure of each subdatabase we encounter.
+ */
+static int
+__db_vrfy_subdbs(dbp, vdp, dbname, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ const char *dbname;
+ u_int32_t flags;
+{
+ DB *mdbp;
+ DBC *dbc;
+ DBT key, data;
+ ENV *env;
+ VRFY_PAGEINFO *pip;
+ db_pgno_t meta_pgno;
+ int ret, t_ret, isbad;
+ u_int8_t type;
+
+ isbad = 0;
+ dbc = NULL;
+ env = dbp->env;
+
+ if ((ret = __db_master_open(dbp,
+ vdp->thread_info, NULL, dbname, DB_RDONLY, 0, &mdbp)) != 0)
+ return (ret);
+
+ if ((ret = __db_cursor_int(mdbp, NULL,
+ vdp->txn, DB_BTREE, PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
+ goto err;
+
+ memset(&key, 0, sizeof(key));
+ memset(&data, 0, sizeof(data));
+ while ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) == 0) {
+ if (data.size != sizeof(db_pgno_t)) {
+ EPRINT((env, DB_STR("0556",
+ "Subdatabase entry not page-number size")));
+ isbad = 1;
+ goto err;
+ }
+ memcpy(&meta_pgno, data.data, data.size);
+ /*
+ * Subdatabase meta pgnos are stored in network byte
+ * order for cross-endian compatibility. Swap if appropriate.
+ */
+ DB_NTOHL_SWAP(env, &meta_pgno);
+ if (meta_pgno == PGNO_INVALID || meta_pgno > vdp->last_pgno) {
+ EPRINT((env, DB_STR_A("0557",
+ "Subdatabase entry references invalid page %lu",
+ "%lu"), (u_long)meta_pgno));
+ isbad = 1;
+ goto err;
+ }
+ if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0)
+ goto err;
+ type = pip->type;
+ if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
+ goto err;
+ switch (type) {
+ case P_BTREEMETA:
+ if ((ret = __bam_vrfy_structure(
+ dbp, vdp, meta_pgno, NULL, NULL, flags)) != 0) {
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else
+ goto err;
+ }
+ break;
+ case P_HASHMETA:
+ if ((ret = __ham_vrfy_structure(
+ dbp, vdp, meta_pgno, flags)) != 0) {
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else
+ goto err;
+ }
+ break;
+ case P_QAMMETA:
+ default:
+ EPRINT((env, DB_STR_A("0558",
+ "Subdatabase entry references page %lu of invalid type %lu",
+ "%lu %lu"), (u_long)meta_pgno, (u_long)type));
+ ret = DB_VERIFY_BAD;
+ goto err;
+ }
+ }
+
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+
+err: if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if ((t_ret = __db_close(mdbp, NULL, 0)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
+}
+
+/*
+ * __db_vrfy_struct_feedback --
+ * Provide feedback during top-down database structure traversal.
+ * (See comment at the beginning of __db_vrfy_structure.)
+ *
+ * PUBLIC: void __db_vrfy_struct_feedback __P((DB *, VRFY_DBINFO *));
+ */
+void
+__db_vrfy_struct_feedback(dbp, vdp)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+{
+ int progress;
+
+ if (dbp->db_feedback == NULL)
+ return;
+
+ if (vdp->pgs_remaining > 0)
+ vdp->pgs_remaining--;
+
+ /* Don't allow a feedback call of 100 until we're really done. */
+ progress = 100 - (int)(vdp->pgs_remaining * 50 / (vdp->last_pgno + 1));
+ dbp->db_feedback(dbp, DB_VERIFY, progress == 100 ? 99 : progress);
+}
+
+/*
+ * __db_vrfy_orderchkonly --
+ * Do an sort-order/hashing check on a known-otherwise-good subdb.
+ */
+static int
+__db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ const char *name, *subdb;
+ u_int32_t flags;
+{
+ BTMETA *btmeta;
+ DB *mdbp, *pgset;
+ DBC *pgsc;
+ DBT key, data;
+ DB_MPOOLFILE *mpf;
+ ENV *env;
+ HASH *h_internal;
+ HMETA *hmeta;
+ PAGE *h, *currpg;
+ db_pgno_t meta_pgno, p, pgno;
+ u_int32_t bucket;
+ int t_ret, ret;
+
+ pgset = NULL;
+ pgsc = NULL;
+ env = dbp->env;
+ mpf = dbp->mpf;
+ currpg = h = NULL;
+
+ LF_CLR(DB_NOORDERCHK);
+
+ /* Open the master database and get the meta_pgno for the subdb. */
+ if ((ret = __db_master_open(dbp,
+ vdp->thread_info, NULL, name, DB_RDONLY, 0, &mdbp)) != 0)
+ goto err;
+
+ DB_INIT_DBT(key, subdb, strlen(subdb));
+ memset(&data, 0, sizeof(data));
+ if ((ret = __db_get(mdbp,
+ vdp->thread_info, NULL, &key, &data, 0)) != 0) {
+ if (ret == DB_NOTFOUND)
+ ret = ENOENT;
+ goto err;
+ }
+
+ if (data.size != sizeof(db_pgno_t)) {
+ EPRINT((env, DB_STR("0559",
+ "Subdatabase entry of invalid size")));
+ ret = DB_VERIFY_BAD;
+ goto err;
+ }
+
+ memcpy(&meta_pgno, data.data, data.size);
+
+ /*
+ * Subdatabase meta pgnos are stored in network byte
+ * order for cross-endian compatibility. Swap if appropriate.
+ */
+ DB_NTOHL_SWAP(env, &meta_pgno);
+
+ if ((ret = __memp_fget(mpf,
+ &meta_pgno, vdp->thread_info, NULL, 0, &h)) != 0)
+ goto err;
+
+ if ((ret = __db_vrfy_pgset(env,
+ vdp->thread_info, dbp->pgsize, &pgset)) != 0)
+ goto err;
+
+ switch (TYPE(h)) {
+ case P_BTREEMETA:
+ btmeta = (BTMETA *)h;
+ if (F_ISSET(&btmeta->dbmeta, BTM_RECNO)) {
+ /* Recnos have no order to check. */
+ ret = 0;
+ goto err;
+ }
+ if ((ret =
+ __db_meta2pgset(dbp, vdp, meta_pgno, flags, pgset)) != 0)
+ goto err;
+ if ((ret = __db_cursor_int(pgset, NULL, vdp->txn, dbp->type,
+ PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0)
+ goto err;
+ while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
+ if ((ret = __memp_fget(mpf, &p,
+ vdp->thread_info, NULL, 0, &currpg)) != 0)
+ goto err;
+ if ((ret = __bam_vrfy_itemorder(dbp, NULL,
+ vdp->thread_info, currpg, p, NUM_ENT(currpg), 1,
+ F_ISSET(&btmeta->dbmeta, BTM_DUP), flags)) != 0)
+ goto err;
+ if ((ret = __memp_fput(mpf,
+ vdp->thread_info, currpg, dbp->priority)) != 0)
+ goto err;
+ currpg = NULL;
+ }
+
+ /*
+ * The normal exit condition for the loop above is DB_NOTFOUND.
+ * If we see that, zero it and continue on to cleanup.
+ * Otherwise, it's a real error and will be returned.
+ */
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ break;
+ case P_HASHMETA:
+ hmeta = (HMETA *)h;
+ h_internal = (HASH *)dbp->h_internal;
+ /*
+ * Make sure h_charkey is right.
+ */
+ if (h_internal == NULL) {
+ EPRINT((env, DB_STR_A("0560",
+ "Page %lu: DB->h_internal field is NULL", "%lu"),
+ (u_long)meta_pgno));
+ ret = DB_VERIFY_BAD;
+ goto err;
+ }
+ if (h_internal->h_hash == NULL)
+ h_internal->h_hash = hmeta->dbmeta.version < 5
+ ? __ham_func4 : __ham_func5;
+ if (hmeta->h_charkey !=
+ h_internal->h_hash(dbp, CHARKEY, sizeof(CHARKEY))) {
+ EPRINT((env, DB_STR_A("0561",
+ "Page %lu: incorrect hash function for database",
+ "%lu"), (u_long)meta_pgno));
+ ret = DB_VERIFY_BAD;
+ goto err;
+ }
+
+ /*
+ * Foreach bucket, verify hashing on each page in the
+ * corresponding chain of pages.
+ */
+ if ((ret = __db_cursor_int(dbp, NULL, vdp->txn, dbp->type,
+ PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0)
+ goto err;
+ for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) {
+ pgno = BS_TO_PAGE(bucket, hmeta->spares);
+ while (pgno != PGNO_INVALID) {
+ if ((ret = __memp_fget(mpf, &pgno,
+ vdp->thread_info, NULL, 0, &currpg)) != 0)
+ goto err;
+ if ((ret = __ham_vrfy_hashing(pgsc,
+ NUM_ENT(currpg), hmeta, bucket, pgno,
+ flags, h_internal->h_hash)) != 0)
+ goto err;
+ pgno = NEXT_PGNO(currpg);
+ if ((ret = __memp_fput(mpf, vdp->thread_info,
+ currpg, dbp->priority)) != 0)
+ goto err;
+ currpg = NULL;
+ }
+ }
+ break;
+ default:
+ EPRINT((env, DB_STR_A("0562",
+ "Page %lu: database metapage of bad type %lu",
+ "%lu %lu"), (u_long)meta_pgno, (u_long)TYPE(h)));
+ ret = DB_VERIFY_BAD;
+ break;
+ }
+
+err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0)
+ ret = t_ret;
+ if (pgset != NULL &&
+ (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (h != NULL && (t_ret = __memp_fput(mpf,
+ vdp->thread_info, h, dbp->priority)) != 0)
+ ret = t_ret;
+ if (currpg != NULL &&
+ (t_ret = __memp_fput(mpf,
+ vdp->thread_info, currpg, dbp->priority)) != 0)
+ ret = t_ret;
+ if ((t_ret = __db_close(mdbp, NULL, 0)) != 0)
+ ret = t_ret;
+ return (ret);
+}
+
+/*
+ * __db_salvage_pg --
+ * Walk through a page, salvaging all likely or plausible (w/
+ * DB_AGGRESSIVE) key/data pairs and marking seen pages in vdp.
+ *
+ * PUBLIC: int __db_salvage_pg __P((DB *, VRFY_DBINFO *, db_pgno_t,
+ * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t));
+ */
+int
+__db_salvage_pg(dbp, vdp, pgno, h, handle, callback, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ db_pgno_t pgno;
+ PAGE *h;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ u_int32_t flags;
+{
+ ENV *env;
+ VRFY_PAGEINFO *pip;
+ int keyflag, ret, t_ret;
+
+ env = dbp->env;
+ DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
+
+ /*
+ * !!!
+ * We dump record numbers when salvaging Queue databases, but not for
+ * immutable Recno databases. The problem is we can't figure out the
+ * record number from the database page in the Recno case, while the
+ * offset in the file is sufficient for Queue.
+ */
+ keyflag = 0;
+
+ /* If we got this page in the subdb pass, we can safely skip it. */
+ if (__db_salvage_isdone(vdp, pgno))
+ return (0);
+
+ switch (TYPE(h)) {
+ case P_BTREEMETA:
+ ret = __bam_vrfy_meta(dbp, vdp, (BTMETA *)h, pgno, flags);
+ break;
+ case P_HASH:
+ case P_HASH_UNSORTED:
+ case P_HEAP:
+ case P_LBTREE:
+ case P_QAMDATA:
+ return (__db_salvage_leaf(dbp,
+ vdp, pgno, h, handle, callback, flags));
+ case P_HASHMETA:
+ ret = __ham_vrfy_meta(dbp, vdp, (HMETA *)h, pgno, flags);
+ break;
+ case P_HEAPMETA:
+ ret = __heap_vrfy_meta(dbp, vdp, (HEAPMETA *)h, pgno, flags);
+ break;
+ case P_IBTREE:
+ /*
+ * We need to mark any overflow keys on internal pages as seen,
+ * so we don't print them out in __db_salvage_unknowns. But if
+ * we're an upgraded database, a P_LBTREE page may very well
+ * have a reference to the same overflow pages (this practice
+ * stopped somewhere around db4.5). To give P_LBTREEs a chance
+ * to print out any keys on shared pages, mark the page now and
+ * deal with it at the end.
+ */
+ return (__db_salvage_markneeded(vdp, pgno, SALVAGE_IBTREE));
+ case P_IHEAP:
+ /*
+ * There's nothing to salvage from heap region pages. Just mark
+ * that we've seen the page.
+ */
+ return (__db_salvage_markdone(vdp, pgno));
+ case P_LDUP:
+ return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LDUP));
+ case P_LRECNO:
+ /*
+ * Recno leaves are tough, because the leaf could be (1) a dup
+ * page, or it could be (2) a regular database leaf page.
+ * Fortunately, RECNO databases are not allowed to have
+ * duplicates.
+ *
+ * If there are no subdatabases, dump the page immediately if
+ * it's a leaf in a RECNO database, otherwise wait and hopefully
+ * it will be dumped by the leaf page that refers to it,
+ * otherwise we'll get it with the unknowns.
+ *
+ * If there are subdatabases, there might be mixed types and
+ * dbp->type can't be trusted. We'll only get here after
+ * salvaging each database, though, so salvaging this page
+ * immediately isn't important. If this page is a dup, it might
+ * get salvaged later on, otherwise the unknowns pass will pick
+ * it up. Note that SALVAGE_HASSUBDBS won't get set if we're
+ * salvaging aggressively.
+ *
+ * If we're salvaging aggressively, we don't know whether or not
+ * there's subdatabases, so we wait on all recno pages.
+ */
+ if (!LF_ISSET(DB_AGGRESSIVE) &&
+ !F_ISSET(vdp, SALVAGE_HASSUBDBS) && dbp->type == DB_RECNO)
+ return (__db_salvage_leaf(dbp,
+ vdp, pgno, h, handle, callback, flags));
+ return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LRECNODUP));
+ case P_OVERFLOW:
+ return (__db_salvage_markneeded(vdp, pgno, SALVAGE_OVERFLOW));
+ case P_QAMMETA:
+ keyflag = 1;
+ ret = __qam_vrfy_meta(dbp, vdp, (QMETA *)h, pgno, flags);
+ break;
+ case P_INVALID:
+ case P_IRECNO:
+ case __P_DUPLICATE:
+ default:
+ /*
+ * There's no need to display an error, the page type was
+ * already checked and reported on.
+ */
+ return (0);
+ }
+ if (ret != 0)
+ return (ret);
+
+ /*
+ * We have to display the dump header if it's a metadata page. It's
+ * our last chance as the page was marked "seen" in the vrfy routine,
+ * and we won't see the page again. We don't display headers for
+ * the first database in a multi-database file, that database simply
+ * contains a list of subdatabases.
+ */
+ if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+ return (ret);
+ if (!F_ISSET(pip, VRFY_HAS_SUBDBS) && !LF_ISSET(DB_VERIFY_PARTITION))
+ ret = __db_prheader(
+ dbp, NULL, 0, keyflag, handle, callback, vdp, pgno);
+ if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
+
+/*
+ * __db_salvage_leaf --
+ * Walk through a leaf, salvaging all likely key/data pairs and marking
+ * seen pages in vdp.
+ *
+ * PUBLIC: int __db_salvage_leaf __P((DB *, VRFY_DBINFO *, db_pgno_t,
+ * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t));
+ */
+int
+__db_salvage_leaf(dbp, vdp, pgno, h, handle, callback, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ db_pgno_t pgno;
+ PAGE *h;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ u_int32_t flags;
+{
+ ENV *env;
+
+ env = dbp->env;
+ DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
+
+ /* If we got this page in the subdb pass, we can safely skip it. */
+ if (__db_salvage_isdone(vdp, pgno))
+ return (0);
+
+ switch (TYPE(h)) {
+ case P_HASH_UNSORTED:
+ case P_HASH:
+ return (__ham_salvage(dbp, vdp,
+ pgno, h, handle, callback, flags));
+ case P_HEAP:
+ return (__heap_salvage(dbp, vdp,
+ pgno, h, handle, callback, flags));
+ case P_LBTREE:
+ case P_LRECNO:
+ return (__bam_salvage(dbp, vdp,
+ pgno, TYPE(h), h, handle, callback, NULL, flags));
+ case P_QAMDATA:
+ return (__qam_salvage(dbp, vdp,
+ pgno, h, handle, callback, flags));
+ default:
+ /*
+ * There's no need to display an error, the page type was
+ * already checked and reported on.
+ */
+ return (0);
+ }
+}
+
+/*
+ * __db_salvage_unknowns --
+ * Walk through the salvager database, printing with key "UNKNOWN"
+ * any pages we haven't dealt with.
+ */
+static int
+__db_salvage_unknowns(dbp, vdp, handle, callback, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ u_int32_t flags;
+{
+ DBC *dbc;
+ DBT unkdbt, key, *dbt;
+ DB_MPOOLFILE *mpf;
+ ENV *env;
+ PAGE *h;
+ db_pgno_t pgno;
+ u_int32_t pgtype, ovfl_bufsz, tmp_flags;
+ int ret, t_ret;
+ void *ovflbuf;
+
+ dbc = NULL;
+ env = dbp->env;
+ mpf = dbp->mpf;
+
+ DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1);
+
+ if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0)
+ return (ret);
+ ovfl_bufsz = dbp->pgsize;
+
+ /*
+ * We make two passes -- in the first pass, skip SALVAGE_OVERFLOW
+ * pages, because they may be referenced by the standard database
+ * pages that we're resolving.
+ */
+ while ((t_ret =
+ __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 1)) == 0) {
+ if ((t_ret = __memp_fget(mpf,
+ &pgno, vdp->thread_info, NULL, 0, &h)) != 0) {
+ if (ret == 0)
+ ret = t_ret;
+ continue;
+ }
+
+ dbt = NULL;
+ tmp_flags = 0;
+ switch (pgtype) {
+ case SALVAGE_LDUP:
+ case SALVAGE_LRECNODUP:
+ dbt = &unkdbt;
+ tmp_flags = DB_SA_UNKNOWNKEY;
+ /* FALLTHROUGH */
+ case SALVAGE_IBTREE:
+ case SALVAGE_LBTREE:
+ case SALVAGE_LRECNO:
+ if ((t_ret = __bam_salvage(
+ dbp, vdp, pgno, pgtype, h, handle,
+ callback, dbt, tmp_flags | flags)) != 0 && ret == 0)
+ ret = t_ret;
+ break;
+ case SALVAGE_OVERFLOW:
+ DB_ASSERT(env, 0); /* Shouldn't ever happen. */
+ break;
+ case SALVAGE_HASH:
+ if ((t_ret = __ham_salvage(dbp, vdp,
+ pgno, h, handle, callback, flags)) != 0 && ret == 0)
+ ret = t_ret;
+ break;
+ case SALVAGE_INVALID:
+ case SALVAGE_IGNORE:
+ default:
+ /*
+ * Shouldn't happen, but if it does, just do what the
+ * nice man says.
+ */
+ DB_ASSERT(env, 0);
+ break;
+ }
+ if ((t_ret = __memp_fput(mpf,
+ vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
+ ret = t_ret;
+ }
+
+ /* We should have reached the end of the database. */
+ if (t_ret == DB_NOTFOUND)
+ t_ret = 0;
+ if (t_ret != 0 && ret == 0)
+ ret = t_ret;
+
+ /* Re-open the cursor so we traverse the database again. */
+ if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+ dbc = NULL;
+
+ /* Now, deal with any remaining overflow pages. */
+ while ((t_ret =
+ __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 0)) == 0) {
+ if ((t_ret = __memp_fget(mpf,
+ &pgno, vdp->thread_info, NULL, 0, &h)) != 0) {
+ if (ret == 0)
+ ret = t_ret;
+ continue;
+ }
+
+ switch (pgtype) {
+ case SALVAGE_OVERFLOW:
+ /*
+ * XXX:
+ * This may generate multiple "UNKNOWN" keys in
+ * a database with no dups. What to do?
+ */
+ if ((t_ret = __db_safe_goff(dbp, vdp,
+ pgno, &key, &ovflbuf, &ovfl_bufsz, flags)) != 0 ||
+ ((vdp->type == DB_BTREE || vdp->type == DB_HASH) &&
+ (t_ret = __db_vrfy_prdbt(&unkdbt,
+ 0, " ", handle, callback, 0, 0, vdp)) != 0) ||
+ (t_ret = __db_vrfy_prdbt(
+ &key, 0, " ", handle, callback, 0, 0, vdp)) != 0)
+ if (ret == 0)
+ ret = t_ret;
+ break;
+ default:
+ DB_ASSERT(env, 0); /* Shouldn't ever happen. */
+ break;
+ }
+ if ((t_ret = __memp_fput(mpf,
+ vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
+ ret = t_ret;
+ }
+
+ /* We should have reached the end of the database. */
+ if (t_ret == DB_NOTFOUND)
+ t_ret = 0;
+ if (t_ret != 0 && ret == 0)
+ ret = t_ret;
+
+ if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+
+ __os_free(env, ovflbuf);
+
+ return (ret);
+}
+
+/*
+ * Offset of the ith inp array entry, which we can compare to the offset
+ * the entry stores.
+ */
+#define INP_OFFSET(dbp, h, i) \
+ ((db_indx_t)((u_int8_t *)((P_INP(dbp,(h))) + (i)) - (u_int8_t *)(h)))
+
+/*
+ * __db_vrfy_inpitem --
+ * Verify that a single entry in the inp array is sane, and update
+ * the high water mark and current item offset. (The former of these is
+ * used for state information between calls, and is required; it must
+ * be initialized to the pagesize before the first call.)
+ *
+ * Returns DB_VERIFY_FATAL if inp has collided with the data,
+ * since verification can't continue from there; returns DB_VERIFY_BAD
+ * if anything else is wrong.
+ *
+ * PUBLIC: int __db_vrfy_inpitem __P((DB *, PAGE *,
+ * PUBLIC: db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *));
+ */
+int
+__db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp)
+ DB *dbp;
+ PAGE *h;
+ db_pgno_t pgno;
+ u_int32_t i;
+ int is_btree;
+ u_int32_t flags, *himarkp, *offsetp;
+{
+ BKEYDATA *bk;
+ ENV *env;
+ db_indx_t *inp, offset, len;
+
+ env = dbp->env;
+
+ DB_ASSERT(env, himarkp != NULL);
+ inp = P_INP(dbp, h);
+
+ /*
+ * Check that the inp array, which grows from the beginning of the
+ * page forward, has not collided with the data, which grow from the
+ * end of the page backward.
+ */
+ if (inp + i >= (db_indx_t *)((u_int8_t *)h + *himarkp)) {
+ /* We've collided with the data. We need to bail. */
+ EPRINT((env, DB_STR_A("0563",
+ "Page %lu: entries listing %lu overlaps data",
+ "%lu %lu"), (u_long)pgno, (u_long)i));
+ return (DB_VERIFY_FATAL);
+ }
+
+ offset = inp[i];
+
+ /*
+ * Check that the item offset is reasonable: it points somewhere
+ * after the inp array and before the end of the page.
+ */
+ if (offset <= INP_OFFSET(dbp, h, i) || offset >= dbp->pgsize) {
+ EPRINT((env, DB_STR_A("0564",
+ "Page %lu: bad offset %lu at page index %lu",
+ "%lu %lu %lu"), (u_long)pgno, (u_long)offset, (u_long)i));
+ return (DB_VERIFY_BAD);
+ }
+
+ /* Update the high-water mark (what HOFFSET should be) */
+ if (offset < *himarkp)
+ *himarkp = offset;
+
+ if (is_btree) {
+ /*
+ * Check alignment; if it's unaligned, it's unsafe to
+ * manipulate this item.
+ */
+ if (offset != DB_ALIGN(offset, sizeof(u_int32_t))) {
+ EPRINT((env, DB_STR_A("0565",
+ "Page %lu: unaligned offset %lu at page index %lu",
+ "%lu %lu %lu"), (u_long)pgno, (u_long)offset,
+ (u_long)i));
+ return (DB_VERIFY_BAD);
+ }
+
+ /*
+ * Check that the item length remains on-page.
+ */
+ bk = GET_BKEYDATA(dbp, h, i);
+
+ /*
+ * We need to verify the type of the item here;
+ * we can't simply assume that it will be one of the
+ * expected three. If it's not a recognizable type,
+ * it can't be considered to have a verifiable
+ * length, so it's not possible to certify it as safe.
+ */
+ switch (B_TYPE(bk->type)) {
+ case B_KEYDATA:
+ len = bk->len;
+ break;
+ case B_DUPLICATE:
+ case B_OVERFLOW:
+ len = BOVERFLOW_SIZE;
+ break;
+ default:
+ EPRINT((env, DB_STR_A("0566",
+ "Page %lu: item %lu of unrecognizable type",
+ "%lu %lu"), (u_long)pgno, (u_long)i));
+ return (DB_VERIFY_BAD);
+ }
+
+ if ((size_t)(offset + len) > dbp->pgsize) {
+ EPRINT((env, DB_STR_A("0567",
+ "Page %lu: item %lu extends past page boundary",
+ "%lu %lu"), (u_long)pgno, (u_long)i));
+ return (DB_VERIFY_BAD);
+ }
+ }
+
+ if (offsetp != NULL)
+ *offsetp = offset;
+ return (0);
+}
+
+/*
+ * __db_vrfy_duptype--
+ * Given a page number and a set of flags to __bam_vrfy_subtree,
+ * verify that the dup tree type is correct--i.e., it's a recno
+ * if DUPSORT is not set and a btree if it is.
+ *
+ * PUBLIC: int __db_vrfy_duptype
+ * PUBLIC: __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
+ */
+int
+__db_vrfy_duptype(dbp, vdp, pgno, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ db_pgno_t pgno;
+ u_int32_t flags;
+{
+ ENV *env;
+ VRFY_PAGEINFO *pip;
+ int ret, isbad;
+
+ env = dbp->env;
+ isbad = 0;
+
+ if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
+ return (ret);
+
+ switch (pip->type) {
+ case P_IBTREE:
+ case P_LDUP:
+ if (!LF_ISSET(DB_ST_DUPSORT)) {
+ EPRINT((env, DB_STR_A("0568",
+ "Page %lu: sorted duplicate set in unsorted-dup database",
+ "%lu"), (u_long)pgno));
+ isbad = 1;
+ }
+ break;
+ case P_IRECNO:
+ case P_LRECNO:
+ if (LF_ISSET(DB_ST_DUPSORT)) {
+ EPRINT((env, DB_STR_A("0569",
+ "Page %lu: unsorted duplicate set in sorted-dup database",
+ "%lu"), (u_long)pgno));
+ isbad = 1;
+ }
+ break;
+ default:
+ /*
+ * If the page is entirely zeroed, its pip->type will be a lie
+ * (we assumed it was a hash page, as they're allowed to be
+ * zeroed); handle this case specially.
+ */
+ if (F_ISSET(pip, VRFY_IS_ALLZEROES))
+ ZEROPG_ERR_PRINT(env, pgno, DB_STR_P("duplicate page"));
+ else
+ EPRINT((env, DB_STR_A("0570",
+ "Page %lu: duplicate page of inappropriate type %lu",
+ "%lu %lu"), (u_long)pgno, (u_long)pip->type));
+ isbad = 1;
+ break;
+ }
+
+ if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
+ return (ret);
+ return (isbad == 1 ? DB_VERIFY_BAD : 0);
+}
+
+/*
+ * __db_salvage_duptree --
+ * Attempt to salvage a given duplicate tree, given its alleged root.
+ *
+ * The key that corresponds to this dup set has been passed to us
+ * in DBT *key. Because data items follow keys, though, it has been
+ * printed once already.
+ *
+ * The basic idea here is that pgno ought to be a P_LDUP, a P_LRECNO, a
+ * P_IBTREE, or a P_IRECNO. If it's an internal page, use the verifier
+ * functions to make sure it's safe; if it's not, we simply bail and the
+ * data will have to be printed with no key later on. if it is safe,
+ * recurse on each of its children.
+ *
+ * Whether or not it's safe, if it's a leaf page, __bam_salvage it.
+ *
+ * At all times, use the DB hanging off vdp to mark and check what we've
+ * done, so each page gets printed exactly once and we don't get caught
+ * in any cycles.
+ *
+ * PUBLIC: int __db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t,
+ * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t));
+ */
+int
+__db_salvage_duptree(dbp, vdp, pgno, key, handle, callback, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ db_pgno_t pgno;
+ DBT *key;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ u_int32_t flags;
+{
+ DB_MPOOLFILE *mpf;
+ PAGE *h;
+ int ret, t_ret;
+
+ mpf = dbp->mpf;
+
+ if (pgno == PGNO_INVALID || !IS_VALID_PGNO(pgno))
+ return (DB_VERIFY_BAD);
+
+ /* We have a plausible page. Try it. */
+ if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
+ return (ret);
+
+ switch (TYPE(h)) {
+ case P_IBTREE:
+ case P_IRECNO:
+ if ((ret = __db_vrfy_common(dbp, vdp, h, pgno, flags)) != 0)
+ goto err;
+ if ((ret = __bam_vrfy(dbp,
+ vdp, h, pgno, flags | DB_NOORDERCHK)) != 0 ||
+ (ret = __db_salvage_markdone(vdp, pgno)) != 0)
+ goto err;
+ /*
+ * We have a known-healthy internal page. Walk it.
+ */
+ if ((ret = __bam_salvage_walkdupint(dbp, vdp, h, key,
+ handle, callback, flags)) != 0)
+ goto err;
+ break;
+ case P_LRECNO:
+ case P_LDUP:
+ if ((ret = __bam_salvage(dbp,
+ vdp, pgno, TYPE(h), h, handle, callback, key, flags)) != 0)
+ goto err;
+ break;
+ default:
+ ret = DB_VERIFY_BAD;
+ goto err;
+ }
+
+err: if ((t_ret = __memp_fput(mpf,
+ vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
+
+/*
+ * __db_salvage_all --
+ * Salvage only the leaves we find by walking the tree. If we have subdbs,
+ * salvage each of them individually.
+ */
+static int
+__db_salvage_all(dbp, vdp, handle, callback, flags, hassubsp)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ u_int32_t flags;
+ int *hassubsp;
+{
+ DB *pgset;
+ DBC *pgsc;
+ DB_MPOOLFILE *mpf;
+ ENV *env;
+ PAGE *h;
+ VRFY_PAGEINFO *pip;
+ db_pgno_t p, meta_pgno;
+ int ret, t_ret;
+
+ *hassubsp = 0;
+
+ env = dbp->env;
+ pgset = NULL;
+ pgsc = NULL;
+ mpf = dbp->mpf;
+ h = NULL;
+ pip = NULL;
+ ret = 0;
+
+ /*
+ * Check to make sure the page is OK and find out if it contains
+ * subdatabases.
+ */
+ meta_pgno = PGNO_BASE_MD;
+ if ((t_ret = __memp_fget(mpf,
+ &meta_pgno, vdp->thread_info, NULL, 0, &h)) == 0 &&
+ (t_ret = __db_vrfy_common(dbp, vdp, h, PGNO_BASE_MD, flags)) == 0 &&
+ (t_ret = __db_salvage_pg(
+ dbp, vdp, PGNO_BASE_MD, h, handle, callback, flags)) == 0 &&
+ (t_ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) == 0)
+ if (F_ISSET(pip, VRFY_HAS_SUBDBS))
+ *hassubsp = 1;
+ if (pip != NULL &&
+ (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
+ ret = t_ret;
+ if (h != NULL) {
+ if ((t_ret = __memp_fput(mpf,
+ vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
+ ret = t_ret;
+ h = NULL;
+ }
+ if (ret != 0)
+ return (ret);
+
+ /* Without subdatabases, we can just dump from the meta pgno. */
+ if (*hassubsp == 0)
+ return (__db_salvage(dbp,
+ vdp, PGNO_BASE_MD, handle, callback, flags));
+
+ /*
+ * We have subdbs. Try to crack them.
+ *
+ * To do so, get a set of leaf pages in the master database, and then
+ * walk each of the valid ones, salvaging subdbs as we go. If any
+ * prove invalid, just drop them; we'll pick them up on a later pass.
+ */
+ if ((ret = __db_vrfy_pgset(env,
+ vdp->thread_info, dbp->pgsize, &pgset)) != 0)
+ goto err;
+ if ((ret = __db_meta2pgset(dbp, vdp, PGNO_BASE_MD, flags, pgset)) != 0)
+ goto err;
+ if ((ret = __db_cursor(pgset, vdp->thread_info, NULL, &pgsc, 0)) != 0)
+ goto err;
+ while ((t_ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
+ if ((t_ret = __memp_fget(mpf,
+ &p, vdp->thread_info, NULL, 0, &h)) == 0 &&
+ (t_ret = __db_vrfy_common(dbp, vdp, h, p, flags)) == 0 &&
+ (t_ret =
+ __bam_vrfy(dbp, vdp, h, p, flags | DB_NOORDERCHK)) == 0)
+ t_ret = __db_salvage_subdbpg(
+ dbp, vdp, h, handle, callback, flags);
+ if (t_ret != 0 && ret == 0)
+ ret = t_ret;
+ if (h != NULL) {
+ if ((t_ret = __memp_fput(mpf, vdp->thread_info,
+ h, dbp->priority)) != 0 && ret == 0)
+ ret = t_ret;
+ h = NULL;
+ }
+ }
+
+ if (t_ret != DB_NOTFOUND && ret == 0)
+ ret = t_ret;
+
+err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0)
+ ret = t_ret;
+ if (pgset != NULL &&
+ (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret ==0)
+ ret = t_ret;
+ if (h != NULL &&
+ (t_ret = __memp_fput(mpf,
+ vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
+
+/*
+ * __db_salvage_subdbpg --
+ * Given a known-good leaf page in the master database, salvage all
+ * leaf pages corresponding to each subdb.
+ */
+static int
+__db_salvage_subdbpg(dbp, vdp, master, handle, callback, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ PAGE *master;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ u_int32_t flags;
+{
+ BKEYDATA *bkkey, *bkdata;
+ BOVERFLOW *bo;
+ DB *pgset;
+ DBC *pgsc;
+ DBT key;
+ DB_MPOOLFILE *mpf;
+ ENV *env;
+ PAGE *subpg;
+ db_indx_t i;
+ db_pgno_t meta_pgno;
+ int ret, err_ret, t_ret;
+ char *subdbname;
+ u_int32_t ovfl_bufsz;
+
+ env = dbp->env;
+ mpf = dbp->mpf;
+ ret = err_ret = 0;
+ subdbname = NULL;
+ pgsc = NULL;
+ pgset = NULL;
+ ovfl_bufsz = 0;
+
+ /*
+ * For each entry, get and salvage the set of pages
+ * corresponding to that entry.
+ */
+ for (i = 0; i < NUM_ENT(master); i += P_INDX) {
+ bkkey = GET_BKEYDATA(dbp, master, i);
+ bkdata = GET_BKEYDATA(dbp, master, i + O_INDX);
+
+ /* Get the subdatabase name. */
+ if (B_TYPE(bkkey->type) == B_OVERFLOW) {
+ /*
+ * We can, in principle anyway, have a subdb
+ * name so long it overflows. Ick.
+ */
+ bo = (BOVERFLOW *)bkkey;
+ if ((ret = __db_safe_goff(dbp, vdp, bo->pgno,
+ &key, &subdbname, &ovfl_bufsz, flags)) != 0) {
+ err_ret = DB_VERIFY_BAD;
+ continue;
+ }
+
+ /* Nul-terminate it. */
+ if (ovfl_bufsz < key.size + 1) {
+ if ((ret = __os_realloc(env,
+ key.size + 1, &subdbname)) != 0)
+ goto err;
+ ovfl_bufsz = key.size + 1;
+ }
+ subdbname[key.size] = '\0';
+ } else if (B_TYPE(bkkey->type) == B_KEYDATA) {
+ if (ovfl_bufsz < (u_int32_t)bkkey->len + 1) {
+ if ((ret = __os_realloc(env,
+ bkkey->len + 1, &subdbname)) != 0)
+ goto err;
+ ovfl_bufsz = bkkey->len + 1;
+ }
+ DB_ASSERT(env, subdbname != NULL);
+ memcpy(subdbname, bkkey->data, bkkey->len);
+ subdbname[bkkey->len] = '\0';
+ }
+
+ /* Get the corresponding pgno. */
+ if (bkdata->len != sizeof(db_pgno_t)) {
+ err_ret = DB_VERIFY_BAD;
+ continue;
+ }
+ memcpy(&meta_pgno,
+ (db_pgno_t *)bkdata->data, sizeof(db_pgno_t));
+
+ /*
+ * Subdatabase meta pgnos are stored in network byte
+ * order for cross-endian compatibility. Swap if appropriate.
+ */
+ DB_NTOHL_SWAP(env, &meta_pgno);
+
+ /* If we can't get the subdb meta page, just skip the subdb. */
+ if (!IS_VALID_PGNO(meta_pgno) || (ret = __memp_fget(mpf,
+ &meta_pgno, vdp->thread_info, NULL, 0, &subpg)) != 0) {
+ err_ret = ret;
+ continue;
+ }
+
+ /*
+ * Verify the subdatabase meta page. This has two functions.
+ * First, if it's bad, we have no choice but to skip the subdb
+ * and let the pages just get printed on a later pass. Second,
+ * the access-method-specific meta verification routines record
+ * the various state info (such as the presence of dups)
+ * that we need for __db_prheader().
+ */
+ if ((ret =
+ __db_vrfy_common(dbp, vdp, subpg, meta_pgno, flags)) != 0) {
+ err_ret = ret;
+ (void)__memp_fput(mpf,
+ vdp->thread_info, subpg, dbp->priority);
+ continue;
+ }
+ switch (TYPE(subpg)) {
+ case P_BTREEMETA:
+ if ((ret = __bam_vrfy_meta(dbp,
+ vdp, (BTMETA *)subpg, meta_pgno, flags)) != 0) {
+ err_ret = ret;
+ (void)__memp_fput(mpf,
+ vdp->thread_info, subpg, dbp->priority);
+ continue;
+ }
+ break;
+ case P_HASHMETA:
+ if ((ret = __ham_vrfy_meta(dbp,
+ vdp, (HMETA *)subpg, meta_pgno, flags)) != 0) {
+ err_ret = ret;
+ (void)__memp_fput(mpf,
+ vdp->thread_info, subpg, dbp->priority);
+ continue;
+ }
+ break;
+ default:
+ /* This isn't an appropriate page; skip this subdb. */
+ err_ret = DB_VERIFY_BAD;
+ continue;
+ }
+
+ if ((ret = __memp_fput(mpf,
+ vdp->thread_info, subpg, dbp->priority)) != 0) {
+ err_ret = ret;
+ continue;
+ }
+
+ /* Print a subdatabase header. */
+ if ((ret = __db_prheader(dbp,
+ subdbname, 0, 0, handle, callback, vdp, meta_pgno)) != 0)
+ goto err;
+
+ /* Salvage meta_pgno's tree. */
+ if ((ret = __db_salvage(dbp,
+ vdp, meta_pgno, handle, callback, flags)) != 0)
+ err_ret = ret;
+
+ /* Print a subdatabase footer. */
+ if ((ret = __db_prfooter(handle, callback)) != 0)
+ goto err;
+ }
+
+err: if (subdbname)
+ __os_free(env, subdbname);
+
+ if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0)
+ ret = t_ret;
+
+ if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0)
+ ret = t_ret;
+
+ if ((t_ret = __db_salvage_markdone(vdp, PGNO(master))) != 0)
+ return (t_ret);
+
+ return ((err_ret != 0) ? err_ret : ret);
+}
+
+/*
+ * __db_salvage --
+ * Given a meta page number, salvage all data from leaf pages found by
+ * walking the meta page's tree.
+ */
+static int
+__db_salvage(dbp, vdp, meta_pgno, handle, callback, flags)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ db_pgno_t meta_pgno;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ u_int32_t flags;
+
+{
+ DB *pgset;
+ DBC *dbc, *pgsc;
+ DB_MPOOLFILE *mpf;
+ ENV *env;
+ PAGE *subpg;
+ db_pgno_t p;
+ int err_ret, ret, t_ret;
+
+ env = dbp->env;
+ mpf = dbp->mpf;
+ err_ret = ret = t_ret = 0;
+ pgsc = NULL;
+ pgset = NULL;
+ dbc = NULL;
+
+ if ((ret = __db_vrfy_pgset(env,
+ vdp->thread_info, dbp->pgsize, &pgset)) != 0)
+ goto err;
+
+ /* Get all page numbers referenced from this meta page. */
+ if ((ret = __db_meta2pgset(dbp, vdp, meta_pgno,
+ flags, pgset)) != 0) {
+ err_ret = ret;
+ goto err;
+ }
+
+ if ((ret = __db_cursor(pgset,
+ vdp->thread_info, NULL, &pgsc, 0)) != 0)
+ goto err;
+
+ if (dbp->type == DB_QUEUE &&
+ (ret = __db_cursor(dbp, vdp->thread_info, NULL, &dbc, 0)) != 0)
+ goto err;
+
+ /* Salvage every page in pgset. */
+ while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
+ if (dbp->type == DB_QUEUE) {
+#ifdef HAVE_QUEUE
+ ret = __qam_fget(dbc, &p, 0, &subpg);
+#else
+ ret = __db_no_queue_am(env);
+#endif
+ /* Don't report an error for pages not found in a queue.
+ * The pgset is a best guess, it doesn't know about
+ * deleted extents which leads to this error.
+ */
+ if (ret == ENOENT || ret == DB_PAGE_NOTFOUND)
+ continue;
+ } else
+ ret = __memp_fget(mpf,
+ &p, vdp->thread_info, NULL, 0, &subpg);
+ if (ret != 0) {
+ err_ret = ret;
+ continue;
+ }
+
+ if ((ret = __db_salvage_pg(dbp, vdp, p, subpg,
+ handle, callback, flags)) != 0)
+ err_ret = ret;
+
+ if (dbp->type == DB_QUEUE)
+#ifdef HAVE_QUEUE
+ ret = __qam_fput(dbc, p, subpg, dbp->priority);
+#else
+ ret = __db_no_queue_am(env);
+#endif
+ else
+ ret = __memp_fput(mpf,
+ vdp->thread_info, subpg, dbp->priority);
+ if (ret != 0)
+ err_ret = ret;
+ }
+
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+
+err:
+ if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0)
+ ret = t_ret;
+ if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0)
+ ret = t_ret;
+ if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0)
+ ret = t_ret;
+
+ return ((err_ret != 0) ? err_ret : ret);
+}
+
+/*
+ * __db_meta2pgset --
+ * Given a known-safe meta page number, return the set of pages
+ * corresponding to the database it represents. Return DB_VERIFY_BAD if
+ * it's not a suitable meta page or is invalid.
+ */
+static int
+__db_meta2pgset(dbp, vdp, pgno, flags, pgset)
+ DB *dbp;
+ VRFY_DBINFO *vdp;
+ db_pgno_t pgno;
+ u_int32_t flags;
+ DB *pgset;
+{
+ DB_MPOOLFILE *mpf;
+ PAGE *h;
+ int ret, t_ret;
+
+ mpf = dbp->mpf;
+
+ if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
+ return (ret);
+
+ switch (TYPE(h)) {
+ case P_BTREEMETA:
+ ret = __bam_meta2pgset(dbp, vdp, (BTMETA *)h, flags, pgset);
+ break;
+ case P_HASHMETA:
+ ret = __ham_meta2pgset(dbp, vdp, (HMETA *)h, flags, pgset);
+ break;
+ case P_HEAPMETA:
+ ret = __heap_meta2pgset(dbp, vdp, (HEAPMETA *)h, pgset);
+ break;
+ case P_QAMMETA:
+#ifdef HAVE_QUEUE
+ ret = __qam_meta2pgset(dbp, vdp, pgset);
+ break;
+#endif
+ default:
+ ret = DB_VERIFY_BAD;
+ break;
+ }
+
+ if ((t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0)
+ return (t_ret);
+ return (ret);
+}
+
+/*
+ * __db_guesspgsize --
+ * Try to guess what the pagesize is if the one on the meta page
+ * and the one in the db are invalid.
+ */
+static u_int
+__db_guesspgsize(env, fhp)
+ ENV *env;
+ DB_FH *fhp;
+{
+ db_pgno_t i;
+ size_t nr;
+ u_int32_t guess;
+ u_int8_t type;
+
+ for (guess = DB_MAX_PGSIZE; guess >= DB_MIN_PGSIZE; guess >>= 1) {
+ /*
+ * We try to read three pages ahead after the first one
+ * and make sure we have plausible types for all of them.
+ * If the seeks fail, continue with a smaller size;
+ * we're probably just looking past the end of the database.
+ * If they succeed and the types are reasonable, also continue
+ * with a size smaller; we may be looking at pages N,
+ * 2N, and 3N for some N > 1.
+ *
+ * As soon as we hit an invalid type, we stop and return
+ * our previous guess; that last one was probably the page size.
+ */
+ for (i = 1; i <= 3; i++) {
+ if (__os_seek(
+ env, fhp, i, guess, SSZ(DBMETA, type)) != 0)
+ break;
+ if (__os_read(env,
+ fhp, &type, 1, &nr) != 0 || nr == 0)
+ break;
+ if (type == P_INVALID || type >= P_PAGETYPE_MAX)
+ return (guess << 1);
+ }
+ }
+
+ /*
+ * If we're just totally confused--the corruption takes up most of the
+ * beginning pages of the database--go with the default size.
+ */
+ return (DB_DEF_IOSIZE);
+}