summaryrefslogtreecommitdiff
path: root/src/env
diff options
context:
space:
mode:
authorLorry <lorry@roadtrain.codethink.co.uk>2012-07-20 20:00:05 +0100
committerLorry <lorry@roadtrain.codethink.co.uk>2012-07-20 20:00:05 +0100
commit3ef782d3745ea8f25a3151561a3cfb882190210e (patch)
tree86b9c2f5fde051dd0bced99b3fc9f5a3ba08db69 /src/env
downloadberkeleydb-3ef782d3745ea8f25a3151561a3cfb882190210e.tar.gz
Tarball conversion
Diffstat (limited to 'src/env')
-rw-r--r--src/env/env_alloc.c759
-rw-r--r--src/env/env_backup.c166
-rw-r--r--src/env/env_config.c737
-rw-r--r--src/env/env_failchk.c558
-rw-r--r--src/env/env_file.c128
-rw-r--r--src/env/env_globals.c66
-rw-r--r--src/env/env_method.c1918
-rw-r--r--src/env/env_name.c285
-rw-r--r--src/env/env_open.c1262
-rw-r--r--src/env/env_recover.c1093
-rw-r--r--src/env/env_region.c1497
-rw-r--r--src/env/env_register.c730
-rw-r--r--src/env/env_sig.c201
-rw-r--r--src/env/env_stat.c879
14 files changed, 10279 insertions, 0 deletions
diff --git a/src/env/env_alloc.c b/src/env/env_alloc.c
new file mode 100644
index 00000000..700bfb27
--- /dev/null
+++ b/src/env/env_alloc.c
@@ -0,0 +1,759 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+/*
+ * Implement shared memory region allocation. The initial list is a single
+ * memory "chunk" which is carved up as memory is requested. Chunks are
+ * coalesced when free'd. We maintain two types of linked-lists: a list of
+ * all chunks sorted by address, and a set of lists with free chunks sorted
+ * by size.
+ *
+ * The ALLOC_LAYOUT structure is the governing structure for the allocator.
+ *
+ * The ALLOC_ELEMENT structure is the structure that describes any single
+ * chunk of memory, and is immediately followed by the user's memory.
+ *
+ * The internal memory chunks are always aligned to a uintmax_t boundary so
+ * we don't drop core accessing the fields of the ALLOC_ELEMENT structure.
+ *
+ * The memory chunks returned to the user are aligned to a uintmax_t boundary.
+ * This is enforced by terminating the ALLOC_ELEMENT structure with a uintmax_t
+ * field as that immediately precedes the user's memory. Any caller needing
+ * more than uintmax_t alignment is responsible for doing alignment themselves.
+ */
+
+typedef SH_TAILQ_HEAD(__sizeq) SIZEQ_HEAD;
+
+typedef struct __alloc_layout {
+ SH_TAILQ_HEAD(__addrq) addrq; /* Sorted by address */
+
+ /*
+ * A perfect Berkeley DB application does little allocation because
+ * most things are allocated on startup and never free'd. This is
+ * true even for the cache, because we don't free and re-allocate
+ * the memory associated with a cache buffer when swapping a page
+ * in memory for a page on disk -- unless the page is changing size.
+ * The latter problem is why we have multiple size queues. If the
+ * application's working set fits in cache, it's not a problem. If
+ * the application's working set doesn't fit in cache, but all of
+ * the databases have the same size pages, it's still not a problem.
+ * If the application's working set doesn't fit in cache, and its
+ * databases have different page sizes, we can end up walking a lot
+ * of 512B chunk allocations looking for an available 64KB chunk.
+ *
+ * So, we keep a set of queues, where we expect to find a chunk of
+ * roughly the right size at the front of the list. The first queue
+ * is chunks <= 1024, the second is <= 2048, and so on. With 11
+ * queues, we have separate queues for chunks up to 1MB.
+ */
+#define DB_SIZE_Q_COUNT 11
+ SIZEQ_HEAD sizeq[DB_SIZE_Q_COUNT]; /* Sorted by size */
+#ifdef HAVE_STATISTICS
+ u_int32_t pow2_size[DB_SIZE_Q_COUNT];
+#endif
+
+#ifdef HAVE_STATISTICS
+ u_int32_t success; /* Successful allocations */
+ u_int32_t failure; /* Failed allocations */
+ u_int32_t freed; /* Free calls */
+ u_int32_t longest; /* Longest chain walked */
+#endif
+ uintmax_t unused; /* Guarantee alignment */
+} ALLOC_LAYOUT;
+
+typedef struct __alloc_element {
+ SH_TAILQ_ENTRY addrq; /* List by address */
+ SH_TAILQ_ENTRY sizeq; /* List by size */
+
+ /*
+ * The "len" field is the total length of the chunk, not the size
+ * available to the caller. Use a uintmax_t to guarantee that the
+ * size of this struct will be aligned correctly.
+ */
+ uintmax_t len; /* Chunk length */
+
+ /*
+ * The "ulen" field is the length returned to the caller.
+ *
+ * Set to 0 if the chunk is not currently in use.
+ */
+ uintmax_t ulen; /* User's length */
+} ALLOC_ELEMENT;
+
+/*
+ * If the chunk can be split into two pieces, with the fragment holding at
+ * least 64 bytes of memory, we divide the chunk into two parts.
+ */
+#define SHALLOC_FRAGMENT (sizeof(ALLOC_ELEMENT) + 64)
+
+/* Macro to find the appropriate queue for a specific size chunk. */
+#undef SET_QUEUE_FOR_SIZE
+#define SET_QUEUE_FOR_SIZE(head, q, i, len) do { \
+ for (i = 0; i < DB_SIZE_Q_COUNT; ++i) { \
+ q = &(head)->sizeq[i]; \
+ if ((len) <= (u_int64_t)1024 << i) \
+ break; \
+ } \
+} while (0)
+
+static void __env_size_insert __P((ALLOC_LAYOUT *, ALLOC_ELEMENT *));
+
+/*
+ * __env_alloc_init --
+ * Initialize the area as one large chunk.
+ *
+ * PUBLIC: void __env_alloc_init __P((REGINFO *, size_t));
+ */
+void
+__env_alloc_init(infop, size)
+ REGINFO *infop;
+ size_t size;
+{
+ ALLOC_ELEMENT *elp;
+ ALLOC_LAYOUT *head;
+ ENV *env;
+ u_int i;
+
+ env = infop->env;
+
+ /* No initialization needed for heap memory regions. */
+ if (F_ISSET(env, ENV_PRIVATE))
+ return;
+
+ /*
+ * The first chunk of memory is the ALLOC_LAYOUT structure.
+ */
+ head = infop->head;
+ memset(head, 0, sizeof(*head));
+ SH_TAILQ_INIT(&head->addrq);
+ for (i = 0; i < DB_SIZE_Q_COUNT; ++i)
+ SH_TAILQ_INIT(&head->sizeq[i]);
+ COMPQUIET(head->unused, 0);
+
+ /*
+ * The rest of the memory is the first available chunk.
+ */
+ elp = (ALLOC_ELEMENT *)((u_int8_t *)head + sizeof(ALLOC_LAYOUT));
+ elp->len = size - sizeof(ALLOC_LAYOUT);
+ elp->ulen = 0;
+
+ SH_TAILQ_INSERT_HEAD(&head->addrq, elp, addrq, __alloc_element);
+ SH_TAILQ_INSERT_HEAD(
+ &head->sizeq[DB_SIZE_Q_COUNT - 1], elp, sizeq, __alloc_element);
+}
+
+/*
+ * The length, the ALLOC_ELEMENT structure and an optional guard byte,
+ * rounded up to standard alignment.
+ */
+#ifdef DIAGNOSTIC
+#define DB_ALLOC_SIZE(len) \
+ (size_t)DB_ALIGN((len) + sizeof(ALLOC_ELEMENT) + 1, sizeof(uintmax_t))
+#else
+#define DB_ALLOC_SIZE(len) \
+ (size_t)DB_ALIGN((len) + sizeof(ALLOC_ELEMENT), sizeof(uintmax_t))
+#endif
+
+/*
+ * __env_alloc_overhead --
+ * Return the overhead needed for an allocation.
+ *
+ * PUBLIC: size_t __env_alloc_overhead __P((void));
+ */
+size_t
+__env_alloc_overhead()
+{
+ return (sizeof(ALLOC_ELEMENT));
+}
+
+/*
+ * __env_alloc_size --
+ * Return the space needed for an allocation, including alignment.
+ *
+ * PUBLIC: size_t __env_alloc_size __P((size_t));
+ */
+size_t
+__env_alloc_size(len)
+ size_t len;
+{
+ return (DB_ALLOC_SIZE(len));
+}
+
+/*
+ * __env_alloc --
+ * Allocate space from the shared region.
+ *
+ * PUBLIC: int __env_alloc __P((REGINFO *, size_t, void *));
+ */
+int
+__env_alloc(infop, len, retp)
+ REGINFO *infop;
+ size_t len;
+ void *retp;
+{
+ SIZEQ_HEAD *q;
+ ALLOC_ELEMENT *elp, *frag, *elp_tmp;
+ ALLOC_LAYOUT *head;
+ ENV *env;
+ REGION_MEM *mem;
+ REGINFO *envinfop;
+ size_t total_len;
+ u_int8_t *p;
+ u_int i;
+ int ret;
+#ifdef HAVE_STATISTICS
+ u_int32_t st_search;
+#endif
+ env = infop->env;
+ *(void **)retp = NULL;
+#ifdef HAVE_MUTEX_SUPPORT
+ MUTEX_REQUIRED(env, infop->mtx_alloc);
+#endif
+
+ PERFMON3(env, mpool, env_alloc, len, infop->id, infop->type);
+ /*
+ * In a heap-backed environment, we call malloc for additional space.
+ * (Malloc must return memory correctly aligned for our use.)
+ *
+ * In a heap-backed environment, memory is laid out as follows:
+ *
+ * { uintmax_t total-length } { user-memory } { guard-byte }
+ */
+ if (F_ISSET(env, ENV_PRIVATE)) {
+ /*
+ * If we are shared then we must track the allocation
+ * in the main environment region.
+ */
+ if (F_ISSET(infop, REGION_SHARED))
+ envinfop = env->reginfo;
+ else
+ envinfop = infop;
+ /*
+ * We need an additional uintmax_t to hold the length (and
+ * keep the buffer aligned on 32-bit systems).
+ */
+ len += sizeof(uintmax_t);
+ if (F_ISSET(infop, REGION_TRACKED))
+ len += sizeof(REGION_MEM);
+
+#ifdef DIAGNOSTIC
+ /* Plus one byte for the guard byte. */
+ ++len;
+#endif
+ /* Check if we're over the limit. */
+ if (envinfop->max_alloc != 0 &&
+ envinfop->allocated + len > envinfop->max_alloc)
+ return (ENOMEM);
+
+ /* Allocate the space. */
+ if ((ret = __os_malloc(env, len, &p)) != 0)
+ return (ret);
+ infop->allocated += len;
+ if (infop != envinfop)
+ envinfop->allocated += len;
+
+ *(uintmax_t *)p = len;
+#ifdef DIAGNOSTIC
+ p[len - 1] = GUARD_BYTE;
+#endif
+ if (F_ISSET(infop, REGION_TRACKED)) {
+ mem = (REGION_MEM *)(p + sizeof(uintmax_t));
+ mem->next = infop->mem;
+ infop->mem = mem;
+ p += sizeof(mem);
+ }
+ *(void **)retp = p + sizeof(uintmax_t);
+ return (0);
+ }
+
+ head = infop->head;
+ total_len = DB_ALLOC_SIZE(len);
+
+ /* Find the first size queue that could satisfy the request. */
+ COMPQUIET(q, NULL);
+#ifdef HAVE_MMAP_EXTEND
+retry:
+#endif
+ SET_QUEUE_FOR_SIZE(head, q, i, total_len);
+
+#ifdef HAVE_STATISTICS
+ if (i >= DB_SIZE_Q_COUNT)
+ i = DB_SIZE_Q_COUNT - 1;
+ ++head->pow2_size[i]; /* Note the size of the request. */
+#endif
+
+ /*
+ * Search this queue, and, if necessary, queues larger than this queue,
+ * looking for a chunk we can use.
+ */
+ STAT(st_search = 0);
+ for (elp = NULL;; ++q) {
+ SH_TAILQ_FOREACH(elp_tmp, q, sizeq, __alloc_element) {
+ STAT(++st_search);
+
+ /*
+ * Chunks are sorted from largest to smallest -- if
+ * this chunk is less than what we need, no chunk
+ * further down the list will be large enough.
+ */
+ if (elp_tmp->len < total_len)
+ break;
+
+ /*
+ * This chunk will do... maybe there's a better one,
+ * but this one will do.
+ */
+ elp = elp_tmp;
+
+ /*
+ * We might have many chunks of the same size. Stop
+ * looking if we won't fragment memory by picking the
+ * current one.
+ */
+ if (elp_tmp->len - total_len <= SHALLOC_FRAGMENT)
+ break;
+ }
+ if (elp != NULL || ++i >= DB_SIZE_Q_COUNT)
+ break;
+ }
+
+#ifdef HAVE_STATISTICS
+ if (head->longest < st_search) {
+ head->longest = st_search;
+ STAT_PERFMON3(env,
+ mpool, longest_search, len, infop->id, st_search);
+ }
+#endif
+
+ /*
+ * If we don't find an element of the right size, try to extend
+ * the region, if not then we are done.
+ */
+ if (elp == NULL) {
+ ret = ENOMEM;
+#ifdef HAVE_MMAP_EXTEND
+ if (infop->rp->size < infop->rp->max &&
+ (ret = __env_region_extend(env, infop)) == 0)
+ goto retry;
+#endif
+ STAT_INC_VERB(env, mpool, fail, head->failure, len, infop->id);
+ return (ret);
+ }
+ STAT_INC_VERB(env, mpool, alloc, head->success, len, infop->id);
+
+ /* Pull the chunk off of the size queue. */
+ SH_TAILQ_REMOVE(q, elp, sizeq, __alloc_element);
+
+ if (elp->len - total_len > SHALLOC_FRAGMENT) {
+ frag = (ALLOC_ELEMENT *)((u_int8_t *)elp + total_len);
+ frag->len = elp->len - total_len;
+ frag->ulen = 0;
+
+ elp->len = total_len;
+
+ /* The fragment follows the chunk on the address queue. */
+ SH_TAILQ_INSERT_AFTER(
+ &head->addrq, elp, frag, addrq, __alloc_element);
+
+ /* Insert the frag into the correct size queue. */
+ __env_size_insert(head, frag);
+ }
+
+ p = (u_int8_t *)elp + sizeof(ALLOC_ELEMENT);
+ elp->ulen = len;
+#ifdef DIAGNOSTIC
+ p[len] = GUARD_BYTE;
+#endif
+ *(void **)retp = p;
+
+ return (0);
+}
+
+/*
+ * __env_alloc_free --
+ * Free space into the shared region.
+ *
+ * PUBLIC: void __env_alloc_free __P((REGINFO *, void *));
+ */
+void
+__env_alloc_free(infop, ptr)
+ REGINFO *infop;
+ void *ptr;
+{
+ ALLOC_ELEMENT *elp, *elp_tmp;
+ ALLOC_LAYOUT *head;
+ ENV *env;
+ SIZEQ_HEAD *q;
+ size_t len;
+ u_int8_t i, *p;
+
+ env = infop->env;
+
+ /* In a private region, we call free. */
+ if (F_ISSET(env, ENV_PRIVATE)) {
+ /* Find the start of the memory chunk and its length. */
+ p = (u_int8_t *)((uintmax_t *)ptr - 1);
+ len = (size_t)*(uintmax_t *)p;
+
+ infop->allocated -= len;
+ if (F_ISSET(infop, REGION_SHARED))
+ env->reginfo->allocated -= len;
+
+#ifdef DIAGNOSTIC
+ /* Check the guard byte. */
+ DB_ASSERT(env, p[len - 1] == GUARD_BYTE);
+
+ /* Trash the memory chunk. */
+ memset(p, CLEAR_BYTE, len);
+#endif
+ __os_free(env, p);
+ return;
+ }
+
+#ifdef HAVE_MUTEX_SUPPORT
+ MUTEX_REQUIRED(env, infop->mtx_alloc);
+#endif
+
+ head = infop->head;
+
+ p = ptr;
+ elp = (ALLOC_ELEMENT *)(p - sizeof(ALLOC_ELEMENT));
+
+ STAT_INC_VERB(env, mpool, free, head->freed, elp->ulen, infop->id);
+
+#ifdef DIAGNOSTIC
+ /* Check the guard byte. */
+ DB_ASSERT(env, p[elp->ulen] == GUARD_BYTE);
+
+ /* Trash the memory chunk. */
+ memset(p, CLEAR_BYTE, (size_t)elp->len - sizeof(ALLOC_ELEMENT));
+#endif
+
+ /* Mark the memory as no longer in use. */
+ elp->ulen = 0;
+
+ /*
+ * Try and merge this chunk with chunks on either side of it. Two
+ * chunks can be merged if they're contiguous and not in use.
+ */
+ if ((elp_tmp =
+ SH_TAILQ_PREV(&head->addrq, elp, addrq, __alloc_element)) != NULL &&
+ elp_tmp->ulen == 0 &&
+ (u_int8_t *)elp_tmp + elp_tmp->len == (u_int8_t *)elp) {
+ /*
+ * If we're merging the entry into a previous entry, remove the
+ * current entry from the addr queue and the previous entry from
+ * its size queue, and merge.
+ */
+ SH_TAILQ_REMOVE(&head->addrq, elp, addrq, __alloc_element);
+ SET_QUEUE_FOR_SIZE(head, q, i, elp_tmp->len);
+ SH_TAILQ_REMOVE(q, elp_tmp, sizeq, __alloc_element);
+
+ elp_tmp->len += elp->len;
+ elp = elp_tmp;
+ }
+ if ((elp_tmp = SH_TAILQ_NEXT(elp, addrq, __alloc_element)) != NULL &&
+ elp_tmp->ulen == 0 &&
+ (u_int8_t *)elp + elp->len == (u_int8_t *)elp_tmp) {
+ /*
+ * If we're merging the current entry into a subsequent entry,
+ * remove the subsequent entry from the addr and size queues
+ * and merge.
+ */
+ SH_TAILQ_REMOVE(&head->addrq, elp_tmp, addrq, __alloc_element);
+ SET_QUEUE_FOR_SIZE(head, q, i, elp_tmp->len);
+ SH_TAILQ_REMOVE(q, elp_tmp, sizeq, __alloc_element);
+
+ elp->len += elp_tmp->len;
+ }
+
+ /* Insert in the correct place in the size queues. */
+ __env_size_insert(head, elp);
+}
+
+/*
+ * __env_alloc_extend --
+ * Extend a previously allocated chunk at the end of a region.
+ *
+ * PUBLIC: int __env_alloc_extend __P((REGINFO *, void *, size_t *));
+ */
+int
+__env_alloc_extend(infop, ptr, lenp)
+ REGINFO *infop;
+ void *ptr;
+ size_t *lenp;
+{
+ ALLOC_ELEMENT *elp, *elp_tmp;
+ ALLOC_LAYOUT *head;
+ ENV *env;
+ SIZEQ_HEAD *q;
+ size_t len, tlen;
+ u_int8_t i, *p;
+ int ret;
+
+ env = infop->env;
+
+ DB_ASSERT(env, !F_ISSET(env, ENV_PRIVATE));
+
+#ifdef HAVE_MUTEX_SUPPORT
+ MUTEX_REQUIRED(env, infop->mtx_alloc);
+#endif
+
+ head = infop->head;
+
+ p = ptr;
+ len = *lenp;
+ elp = (ALLOC_ELEMENT *)(p - sizeof(ALLOC_ELEMENT));
+#ifdef DIAGNOSTIC
+ /* Check the guard byte. */
+ DB_ASSERT(env, p[elp->ulen] == GUARD_BYTE);
+#endif
+
+ /* See if there is anything left in the region. */
+again: if ((elp_tmp = SH_TAILQ_NEXT(elp, addrq, __alloc_element)) != NULL &&
+ elp_tmp->ulen == 0 &&
+ (u_int8_t *)elp + elp->len == (u_int8_t *)elp_tmp) {
+ /*
+ * If we're merging the current entry into a subsequent entry,
+ * remove the subsequent entry from the addr and size queues
+ * and merge.
+ */
+ SH_TAILQ_REMOVE(&head->addrq, elp_tmp, addrq, __alloc_element);
+ SET_QUEUE_FOR_SIZE(head, q, i, elp_tmp->len);
+ SH_TAILQ_REMOVE(q, elp_tmp, sizeq, __alloc_element);
+ if (elp_tmp->len < len + SHALLOC_FRAGMENT) {
+ elp->len += elp_tmp->len;
+ if (elp_tmp->len < len)
+ len -= (size_t)elp_tmp->len;
+ else
+ len = 0;
+ } else {
+ tlen = (size_t)elp_tmp->len;
+ elp_tmp = (ALLOC_ELEMENT *) ((u_int8_t *)elp_tmp + len);
+ elp_tmp->len = tlen - len;
+ elp_tmp->ulen = 0;
+ elp->len += len;
+ len = 0;
+
+ /* The fragment follows the on the address queue. */
+ SH_TAILQ_INSERT_AFTER(
+ &head->addrq, elp, elp_tmp, addrq, __alloc_element);
+
+ /* Insert the frag into the correct size queue. */
+ __env_size_insert(head, elp_tmp);
+ }
+ } else if (elp_tmp != NULL) {
+ __db_errx(env, DB_STR("1583", "block not at end of region"));
+ return (__env_panic(env, EINVAL));
+ }
+ if (len == 0)
+ goto done;
+
+ if ((ret = __env_region_extend(env, infop)) != 0) {
+ if (ret != ENOMEM)
+ return (ret);
+ goto done;
+ }
+ goto again;
+
+done: elp->ulen = elp->len - sizeof(ALLOC_ELEMENT);
+#ifdef DIAGNOSTIC
+ elp->ulen -= sizeof(uintmax_t);
+ /* There was room for the guard byte in the chunk that came in. */
+ p[elp->ulen] = GUARD_BYTE;
+#endif
+ *lenp -= len;
+ infop->allocated += *lenp;
+ if (F_ISSET(infop, REGION_SHARED))
+ env->reginfo->allocated += *lenp;
+ return (0);
+}
+
+/*
+ * __env_size_insert --
+ * Insert into the correct place in the size queues.
+ */
+static void
+__env_size_insert(head, elp)
+ ALLOC_LAYOUT *head;
+ ALLOC_ELEMENT *elp;
+{
+ SIZEQ_HEAD *q;
+ ALLOC_ELEMENT *elp_tmp;
+ u_int i;
+
+ /* Find the appropriate queue for the chunk. */
+ SET_QUEUE_FOR_SIZE(head, q, i, elp->len);
+
+ /* Find the correct slot in the size queue. */
+ SH_TAILQ_FOREACH(elp_tmp, q, sizeq, __alloc_element)
+ if (elp->len >= elp_tmp->len)
+ break;
+ if (elp_tmp == NULL)
+ SH_TAILQ_INSERT_TAIL(q, elp, sizeq);
+ else
+ SH_TAILQ_INSERT_BEFORE(q, elp_tmp, elp, sizeq, __alloc_element);
+}
+
+/*
+ * __env_region_extend --
+ * Extend a region.
+ *
+ * PUBLIC: int __env_region_extend __P((ENV *, REGINFO *));
+ */
+int
+__env_region_extend(env, infop)
+ ENV *env;
+ REGINFO *infop;
+{
+ ALLOC_ELEMENT *elp;
+ REGION *rp;
+ int ret;
+
+ DB_ASSERT(env, !F_ISSET(env, ENV_PRIVATE));
+
+ ret = 0;
+ rp = infop->rp;
+ if (rp->size >= rp->max)
+ return (ENOMEM);
+ elp = (ALLOC_ELEMENT *)((u_int8_t *)infop->addr + rp->size);
+ if (rp->size + rp->alloc > rp->max)
+ rp->alloc = rp->max - rp->size;
+ rp->size += rp->alloc;
+ rp->size = (size_t)ALIGNP_INC(rp->size, sizeof(size_t));
+ if (rp->max - rp->size <= SHALLOC_FRAGMENT)
+ rp->size = rp->max;
+ if (infop->fhp &&
+ (ret = __db_file_extend(env, infop->fhp, rp->size)) != 0)
+ return (ret);
+ elp->len = rp->alloc;
+ elp->ulen = 0;
+#ifdef DIAGNOSTIC
+ *(u_int8_t *)(elp+1) = GUARD_BYTE;
+#endif
+
+ SH_TAILQ_INSERT_TAIL(&((ALLOC_LAYOUT *)infop->head)->addrq, elp, addrq);
+ __env_alloc_free(infop, elp + 1);
+ if (rp->alloc < MEGABYTE)
+ rp->alloc += rp->size;
+ if (rp->alloc > MEGABYTE)
+ rp->alloc = MEGABYTE;
+ return (ret);
+}
+
+/*
+ * __env_elem_size --
+ * Return the size of an allocated element.
+ * PUBLIC: uintmax_t __env_elem_size __P((ENV *, void *));
+ */
+uintmax_t
+__env_elem_size(env, p)
+ ENV *env;
+ void *p;
+{
+ ALLOC_ELEMENT *elp;
+ uintmax_t size;
+
+ if (F_ISSET(env, ENV_PRIVATE)) {
+ size = *((uintmax_t *)p - 1);
+ size -= sizeof(uintmax_t);
+ } else {
+ elp = (ALLOC_ELEMENT *)((u_int8_t *)p - sizeof(ALLOC_ELEMENT));
+ size = elp->ulen;
+ }
+ return (size);
+}
+
+/*
+ * __env_get_chunk --
+ * Return the next chunk allocated in a private region.
+ * PUBLIC: void * __env_get_chunk __P((REGINFO *, void **, uintmax_t *));
+ */
+void *
+__env_get_chunk(infop, nextp, sizep)
+ REGINFO *infop;
+ void **nextp;
+ uintmax_t *sizep;
+{
+ REGION_MEM *mem;
+
+ if (infop->mem == NULL)
+ return (NULL);
+ if (*nextp == NULL)
+ *nextp = infop->mem;
+ mem = *(REGION_MEM **)nextp;
+ *nextp = mem->next;
+
+ *sizep = __env_elem_size(infop->env, mem);
+ *sizep -= sizeof(*mem);
+
+ return ((void *)(mem + 1));
+}
+
+#ifdef HAVE_STATISTICS
+/*
+ * __env_alloc_print --
+ * Display the lists of memory chunks.
+ *
+ * PUBLIC: void __env_alloc_print __P((REGINFO *, u_int32_t));
+ */
+void
+__env_alloc_print(infop, flags)
+ REGINFO *infop;
+ u_int32_t flags;
+{
+ ALLOC_ELEMENT *elp;
+ ALLOC_LAYOUT *head;
+ ENV *env;
+ u_int i;
+
+ env = infop->env;
+ head = infop->head;
+
+ if (F_ISSET(env, ENV_PRIVATE))
+ return;
+
+ __db_msg(env,
+ "Region allocations: %lu allocations, %lu failures, %lu frees, %lu longest",
+ (u_long)head->success, (u_long)head->failure, (u_long)head->freed,
+ (u_long)head->longest);
+
+ if (!LF_ISSET(DB_STAT_ALL))
+ return;
+
+ __db_msg(env, "%s", "Allocations by power-of-two sizes:");
+ for (i = 0; i < DB_SIZE_Q_COUNT; ++i)
+ __db_msg(env, "%3dKB\t%lu",
+ (1024 << i) / 1024, (u_long)head->pow2_size[i]);
+
+ if (!LF_ISSET(DB_STAT_ALLOC))
+ return;
+ /*
+ * We don't normally display the list of address/chunk pairs, a few
+ * thousand lines of output is too voluminous for even DB_STAT_ALL.
+ */
+ __db_msg(env,
+ "Allocation list by address, offset: {chunk length, user length}");
+ SH_TAILQ_FOREACH(elp, &head->addrq, addrq, __alloc_element)
+ __db_msg(env, "\t%#lx, %lu {%lu, %lu}",
+ P_TO_ULONG(elp), (u_long)R_OFFSET(infop, elp),
+ (u_long)elp->len, (u_long)elp->ulen);
+
+ __db_msg(env, "Allocation free list by size: KB {chunk length}");
+ for (i = 0; i < DB_SIZE_Q_COUNT; ++i) {
+ __db_msg(env, "%3dKB", (1024 << i) / 1024);
+ SH_TAILQ_FOREACH(elp, &head->sizeq[i], sizeq, __alloc_element)
+ __db_msg(env,
+ "\t%#lx {%lu}", P_TO_ULONG(elp), (u_long)elp->len);
+ }
+}
+#endif
diff --git a/src/env/env_backup.c b/src/env/env_backup.c
new file mode 100644
index 00000000..9c79dbb4
--- /dev/null
+++ b/src/env/env_backup.c
@@ -0,0 +1,166 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2011, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+static int __env_backup_alloc __P((DB_ENV *));
+
+static int
+__env_backup_alloc(dbenv)
+ DB_ENV *dbenv;
+{
+ ENV *env;
+
+ env = dbenv->env;
+ if (env->backup_handle != NULL)
+ return (0);
+ return (__os_calloc(env, 1,
+ sizeof(*env->backup_handle), &env->backup_handle));
+}
+
+/*
+ * __env_get_backup_config --
+ *
+ * PUBLIC: int __env_get_backup_config __P((DB_ENV *,
+ * PUBLIC: DB_BACKUP_CONFIG, u_int32_t*));
+ */
+int
+__env_get_backup_config(dbenv, config, valuep)
+ DB_ENV *dbenv;
+ DB_BACKUP_CONFIG config;
+ u_int32_t *valuep;
+{
+ DB_BACKUP *backup;
+
+ backup = dbenv->env->backup_handle;
+ if (backup == NULL)
+ return (EINVAL);
+
+ switch (config) {
+ case DB_BACKUP_WRITE_DIRECT:
+ *valuep = F_ISSET(backup, BACKUP_WRITE_DIRECT);
+ break;
+
+ case DB_BACKUP_READ_COUNT:
+ *valuep = backup->read_count;
+ break;
+
+ case DB_BACKUP_READ_SLEEP:
+ *valuep = backup->read_sleep;
+ break;
+
+ case DB_BACKUP_SIZE:
+ *valuep = backup->size;
+ break;
+ }
+ return (0);
+}
+
+/*
+ * __env_set_backup_config --
+ *
+ * PUBLIC: int __env_set_backup_config __P((DB_ENV *,
+ * PUBLIC: DB_BACKUP_CONFIG, u_int32_t));
+ */
+int
+__env_set_backup_config(dbenv, config, value)
+ DB_ENV *dbenv;
+ DB_BACKUP_CONFIG config;
+ u_int32_t value;
+{
+ DB_BACKUP *backup;
+ int ret;
+
+ if ((ret = __env_backup_alloc(dbenv)) != 0)
+ return (ret);
+
+ backup = dbenv->env->backup_handle;
+ switch (config) {
+ case DB_BACKUP_WRITE_DIRECT:
+ if (value == 0)
+ F_CLR(backup, BACKUP_WRITE_DIRECT);
+ else
+ F_SET(backup, BACKUP_WRITE_DIRECT);
+ break;
+
+ case DB_BACKUP_READ_COUNT:
+ backup->read_count = value;
+ break;
+
+ case DB_BACKUP_READ_SLEEP:
+ backup->read_sleep = value;
+ break;
+
+ case DB_BACKUP_SIZE:
+ backup->size = value;
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * __env_get_backup_callbacks --
+ *
+ * PUBLIC: int __env_get_backup_callbacks __P((DB_ENV *,
+ * PUBLIC: int (**)(DB_ENV *, const char *, const char *, void **),
+ * PUBLIC: int (**)(DB_ENV *,
+ * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int8_t *, void *),
+ * PUBLIC: int (**)(DB_ENV *, const char *, void *)));
+ */
+int
+__env_get_backup_callbacks(dbenv, openp, writep, closep)
+ DB_ENV *dbenv;
+ int (**openp)(DB_ENV *, const char *, const char *, void **);
+ int (**writep)(DB_ENV *,
+ u_int32_t, u_int32_t, u_int32_t, u_int8_t *, void *);
+ int (**closep)(DB_ENV *, const char *, void *);
+{
+ DB_BACKUP *backup;
+
+ backup = dbenv->env->backup_handle;
+ if (backup == NULL)
+ return (EINVAL);
+
+ *openp = backup->open;
+ *writep = backup->write;
+ *closep = backup->close;
+ return (0);
+}
+
+/*
+ * __env_set_backup_callbacks --
+ *
+ * PUBLIC: int __env_set_backup_callbacks __P((DB_ENV *,
+ * PUBLIC: int (*)(DB_ENV *, const char *, const char *, void **),
+ * PUBLIC: int (*)(DB_ENV *,
+ * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int8_t *, void *),
+ * PUBLIC: int (*)(DB_ENV *, const char *, void *)));
+ */
+int
+__env_set_backup_callbacks(dbenv, open_func, write_func, close_func)
+ DB_ENV *dbenv;
+ int (*open_func)(DB_ENV *, const char *, const char *, void **);
+ int (*write_func)(DB_ENV *,
+ u_int32_t, u_int32_t, u_int32_t, u_int8_t *, void *);
+ int (*close_func)(DB_ENV *, const char *, void *);
+{
+ DB_BACKUP *backup;
+ int ret;
+
+ if ((ret = __env_backup_alloc(dbenv)) != 0)
+ return (ret);
+
+ backup = dbenv->env->backup_handle;
+ backup->open = open_func;
+ backup->write = write_func;
+ backup->close = close_func;
+ return (0);
+}
diff --git a/src/env/env_config.c b/src/env/env_config.c
new file mode 100644
index 00000000..57496909
--- /dev/null
+++ b/src/env/env_config.c
@@ -0,0 +1,737 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/lock.h"
+#include "dbinc/log.h"
+#include "dbinc/mp.h"
+#include "dbinc/txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc_auto/db_ext.h"
+
+/*
+ * DB_CONFIG lines are processed primarily by interpreting the command
+ * description tables initialized below.
+ *
+ * Most DB_CONFIG commands consist of a single token name followed by one or two
+ * integer or string arguments. These commands are described by entries in the
+ * config_descs[] array.
+ *
+ * The remaining, usually more complex, DB_CONFIG commands are handled by small
+ * code blocks in __config_parse(). Many of those commands need to translate
+ * option names to the integer values needed by the API configuration functions.
+ * Below the __config_descs[] initialization there are many FN array
+ * initializations which provide the mapping between user-specifiable strings
+ * and internally-used integer values. Typically there is one of these mappings
+ * defined for each complex DB_CONFIG command. Use __db_name_to_val()
+ * to translate a string to its integer value.
+ */
+typedef enum {
+ CFG_INT, /* The argument is 1 signed integer. */
+ CFG_LONG, /* The argument is 1 signed long int. */
+ CFG_UINT, /* The argument is 1 unsigned integer. */
+ CFG_2INT, /* The arguments are 2 signed integers. */
+ CFG_2UINT, /* The arguments are 2 unsigned integers. */
+ CFG_STRING /* The rest of the line is a string. */
+} __db_config_type;
+
+typedef struct __db_config_desc {
+ char *name; /* The name of a simple DB_CONFIG command. */
+ __db_config_type type; /* The enum describing its argument type(s). */
+ int (*func)(); /* The function to call with the argument(s). */
+} CFG_DESC;
+
+/* These typedefs help eliminate lint warnings where "func" above is used. */
+typedef int (*CFG_FUNC_STRING) __P((DB_ENV *, const char *));
+typedef int (*CFG_FUNC_INT) __P((DB_ENV *, int));
+typedef int (*CFG_FUNC_LONG) __P((DB_ENV *, long));
+typedef int (*CFG_FUNC_UINT) __P((DB_ENV *, u_int32_t));
+typedef int (*CFG_FUNC_2INT) __P((DB_ENV *, int, int));
+typedef int (*CFG_FUNC_2UINT) __P((DB_ENV *, u_int32_t, u_int32_t));
+
+/*
+ * This table lists the simple DB_CONFIG configuration commands. It is sorted by
+ * the command name, so that __config_scan() can bsearch() it. After making an
+ * addition to this table, please be sure that it remains sorted. With vi or
+ * vim, the following command line will do it:
+ * :/^static const CFG_DESC config_descs/+1, /^}/-1 ! sort
+ *
+ * This table can contain aliases. Aliases have different names with identical
+ * types and functions. At this time there are four aliases:
+ * Outdated Name Current Name
+ * db_data_dir set_data_dir
+ * db_log_dir set_lg_dir
+ * db_tmp_dir set_tmp_dir
+ * set_tas_spins mutex_set_tas_spins
+ */
+static const CFG_DESC config_descs[] = {
+ { "add_data_dir", CFG_STRING, __env_add_data_dir },
+ { "db_data_dir", CFG_STRING, __env_set_data_dir },
+ { "db_log_dir", CFG_STRING, __log_set_lg_dir },
+ { "db_tmp_dir", CFG_STRING, __env_set_tmp_dir },
+ { "mutex_set_align", CFG_UINT, __mutex_set_align },
+ { "mutex_set_increment", CFG_UINT, __mutex_set_increment },
+ { "mutex_set_init", CFG_UINT, __mutex_set_init },
+ { "mutex_set_max", CFG_UINT, __mutex_set_max },
+ { "mutex_set_tas_spins", CFG_UINT, __mutex_set_tas_spins },
+ { "rep_set_clockskew", CFG_2UINT, __rep_set_clockskew },
+ { "rep_set_limit", CFG_2UINT, __rep_set_limit },
+ { "rep_set_nsites", CFG_UINT, __rep_set_nsites_pp },
+ { "rep_set_priority", CFG_UINT, __rep_set_priority },
+ { "rep_set_request", CFG_2UINT, __rep_set_request },
+ { "set_cache_max", CFG_2UINT, __memp_set_cache_max },
+ { "set_create_dir", CFG_STRING, __env_set_create_dir },
+ { "set_data_dir", CFG_STRING, __env_set_data_dir },
+ { "set_data_len", CFG_UINT, __env_set_data_len },
+ { "set_intermediate_dir_mode",CFG_STRING, __env_set_intermediate_dir_mode },
+ { "set_lg_bsize", CFG_UINT, __log_set_lg_bsize },
+ { "set_lg_dir", CFG_STRING, __log_set_lg_dir },
+ { "set_lg_filemode", CFG_INT, __log_set_lg_filemode },
+ { "set_lg_max", CFG_UINT, __log_set_lg_max },
+ { "set_lg_regionmax", CFG_UINT, __log_set_lg_regionmax },
+ { "set_lk_max_lockers", CFG_UINT, __lock_set_lk_max_lockers },
+ { "set_lk_max_locks", CFG_UINT, __lock_set_lk_max_locks },
+ { "set_lk_max_objects", CFG_UINT, __lock_set_lk_max_objects },
+ { "set_lk_partitions", CFG_UINT, __lock_set_lk_partitions },
+ { "set_lk_tablesize", CFG_UINT, __lock_set_lk_tablesize },
+ { "set_memory_max", CFG_2UINT, __env_set_memory_max },
+ { "set_metadata_dir", CFG_STRING, __env_set_metadata_dir },
+ { "set_mp_max_openfd", CFG_INT, __memp_set_mp_max_openfd },
+ { "set_mp_max_write", CFG_2INT, __memp_set_mp_max_write },
+ { "set_mp_mmapsize", CFG_UINT, __memp_set_mp_mmapsize },
+ { "set_mp_mtxcount", CFG_UINT, __memp_set_mp_mtxcount },
+ { "set_mp_pagesize", CFG_UINT, __memp_set_mp_pagesize },
+ { "set_shm_key", CFG_LONG, __env_set_shm_key },
+ { "set_tas_spins", CFG_UINT, __mutex_set_tas_spins },
+ { "set_thread_count", CFG_UINT, __env_set_thread_count },
+ { "set_tmp_dir", CFG_STRING, __env_set_tmp_dir },
+ { "set_tx_max", CFG_UINT, __txn_set_tx_max }
+};
+
+/*
+ * Here are the option-name to option-value mappings used by complex commands.
+ */
+static const FN config_mem_init[] = {
+ { (u_int32_t) DB_MEM_LOCK, "DB_MEM_LOCK" },
+ { (u_int32_t) DB_MEM_LOCKER, "DB_MEM_LOCKER" },
+ { (u_int32_t) DB_MEM_LOCKOBJECT, "DB_MEM_LOCKOBJECT" },
+ { (u_int32_t) DB_MEM_TRANSACTION, "DB_MEM_TRANSACTION" },
+ { (u_int32_t) DB_MEM_THREAD, "DB_MEM_THREAD" },
+ { (u_int32_t) DB_MEM_LOGID, "DB_MEM_LOGID" },
+ { 0, NULL }
+};
+
+static const FN config_rep_config[] = {
+ { DB_REP_CONF_AUTOINIT, "db_rep_conf_autoinit" },
+ { DB_REP_CONF_AUTOROLLBACK, "db_rep_conf_autorollback" },
+ { DB_REP_CONF_BULK, "db_rep_conf_bulk" },
+ { DB_REP_CONF_DELAYCLIENT, "db_rep_conf_delayclient" },
+ { DB_REP_CONF_INMEM, "db_rep_conf_inmem" },
+ { DB_REP_CONF_LEASE, "db_rep_conf_lease" },
+ { DB_REP_CONF_NOWAIT, "db_rep_conf_nowait" },
+ { DB_REPMGR_CONF_2SITE_STRICT, "db_repmgr_conf_2site_strict" },
+ { DB_REPMGR_CONF_ELECTIONS, "db_repmgr_conf_elections" },
+ { 0, NULL }
+};
+
+static const FN config_rep_timeout[] = {
+ { DB_REP_ACK_TIMEOUT, "db_rep_ack_timeout" },
+ { DB_REP_CHECKPOINT_DELAY, "db_rep_checkpoint_delay" },
+ { DB_REP_CONNECTION_RETRY, "db_rep_connection_retry" },
+ { DB_REP_ELECTION_TIMEOUT, "db_rep_election_timeout" },
+ { DB_REP_ELECTION_RETRY, "db_rep_election_retry" },
+ { DB_REP_FULL_ELECTION_TIMEOUT, "db_rep_full_election_timeout" },
+ { DB_REP_HEARTBEAT_MONITOR, "db_rep_heartbeat_monitor" },
+ { DB_REP_HEARTBEAT_SEND, "db_rep_heartbeat_send" },
+ { DB_REP_LEASE_TIMEOUT, "db_rep_lease_timeout" },
+ { 0, NULL }
+};
+
+static const FN config_repmgr_ack_policy[] = {
+ { DB_REPMGR_ACKS_ALL, "db_repmgr_acks_all" },
+ { DB_REPMGR_ACKS_ALL_AVAILABLE, "db_repmgr_acks_all_available" },
+ { DB_REPMGR_ACKS_ALL_PEERS, "db_repmgr_acks_all_peers" },
+ { DB_REPMGR_ACKS_NONE, "db_repmgr_acks_none" },
+ { DB_REPMGR_ACKS_ONE, "db_repmgr_acks_one" },
+ { DB_REPMGR_ACKS_ONE_PEER, "db_repmgr_acks_one_peer" },
+ { DB_REPMGR_ACKS_QUORUM, "db_repmgr_acks_quorum" },
+ { 0, NULL }
+};
+
+static const FN config_repmgr_site[] = {
+ { DB_BOOTSTRAP_HELPER, "db_bootstrap_helper" },
+ { DB_GROUP_CREATOR, "db_group_creator" },
+ { DB_LEGACY, "db_legacy" },
+ { DB_LOCAL_SITE, "db_local_site" },
+ { DB_REPMGR_PEER, "db_repmgr_peer" },
+ { 0, NULL }
+};
+
+static const FN config_set_flags[] = {
+ { DB_AUTO_COMMIT, "db_auto_commit" },
+ { DB_CDB_ALLDB, "db_cdb_alldb" },
+ { DB_DIRECT_DB, "db_direct_db" },
+ { DB_DSYNC_DB, "db_dsync_db" },
+ { DB_MULTIVERSION, "db_multiversion" },
+ { DB_NOLOCKING, "db_nolocking" },
+ { DB_NOMMAP, "db_nommap" },
+ { DB_NOPANIC, "db_nopanic" },
+ { DB_OVERWRITE, "db_overwrite" },
+ { DB_REGION_INIT, "db_region_init" },
+ { DB_TIME_NOTGRANTED, "db_time_notgranted" },
+ { DB_TXN_NOSYNC, "db_txn_nosync" },
+ { DB_TXN_NOWAIT, "db_txn_nowait" },
+ { DB_TXN_SNAPSHOT, "db_txn_snapshot" },
+ { DB_TXN_WRITE_NOSYNC, "db_txn_write_nosync" },
+ { DB_YIELDCPU, "db_yieldcpu" },
+ { 0, NULL }
+};
+
+static const FN config_set_flags_forlog[] = {
+ { DB_LOG_DIRECT, "db_direct_log" },
+ { DB_LOG_DSYNC, "db_dsync_log" },
+ { DB_LOG_AUTO_REMOVE, "db_log_autoremove" },
+ { DB_LOG_IN_MEMORY, "db_log_inmemory" },
+ { 0, NULL }
+};
+
+static const FN config_log_set_config[] = {
+ { DB_LOG_DIRECT, "db_log_direct" },
+ { DB_LOG_DSYNC, "db_log_dsync" },
+ { DB_LOG_AUTO_REMOVE, "db_log_auto_remove" },
+ { DB_LOG_IN_MEMORY, "db_log_in_memory" },
+ { DB_LOG_ZERO, "db_log_zero" },
+ { 0, NULL }
+};
+
+static const FN config_set_lk_detect[] = {
+ { DB_LOCK_DEFAULT, "db_lock_default" },
+ { DB_LOCK_EXPIRE, "db_lock_expire" },
+ { DB_LOCK_MAXLOCKS, "db_lock_maxlocks" },
+ { DB_LOCK_MAXWRITE, "db_lock_maxwrite" },
+ { DB_LOCK_MINLOCKS, "db_lock_minlocks" },
+ { DB_LOCK_MINWRITE, "db_lock_minwrite" },
+ { DB_LOCK_OLDEST, "db_lock_oldest" },
+ { DB_LOCK_RANDOM, "db_lock_random" },
+ { DB_LOCK_YOUNGEST, "db_lock_youngest" },
+ { 0, NULL }
+};
+
+static const FN config_set_open_flags[] = {
+ { DB_INIT_REP, "db_init_rep" },
+ { DB_PRIVATE, "db_private" },
+ { DB_REGISTER, "db_register" },
+ { DB_THREAD, "db_thread" },
+ { 0, NULL }
+};
+
+static const FN config_set_verbose[] = {
+ { DB_VERB_BACKUP, "db_verb_backup" },
+ { DB_VERB_DEADLOCK, "db_verb_deadlock" },
+ { DB_VERB_FILEOPS, "db_verb_fileops" },
+ { DB_VERB_FILEOPS_ALL, "db_verb_fileops_all" },
+ { DB_VERB_RECOVERY, "db_verb_recovery" },
+ { DB_VERB_REGISTER, "db_verb_register" },
+ { DB_VERB_REPLICATION, "db_verb_replication" },
+ { DB_VERB_REP_ELECT, "db_verb_rep_elect" },
+ { DB_VERB_REP_LEASE, "db_verb_rep_lease" },
+ { DB_VERB_REP_MISC, "db_verb_rep_misc" },
+ { DB_VERB_REP_MSGS, "db_verb_rep_msgs" },
+ { DB_VERB_REP_SYNC, "db_verb_rep_sync" },
+ { DB_VERB_REP_SYSTEM, "db_verb_rep_system" },
+ { DB_VERB_REP_TEST, "db_verb_rep_test" },
+ { DB_VERB_REPMGR_CONNFAIL, "db_verb_repmgr_connfail" },
+ { DB_VERB_REPMGR_MISC, "db_verb_repmgr_misc" },
+ { DB_VERB_WAITSFOR, "db_verb_waitsfor" },
+ { 0, NULL}
+};
+
+static int __config_parse __P((ENV *, char *, int));
+static int __config_scan __P((char *, char **, const CFG_DESC **));
+static int cmp_cfg_name __P((const void *, const void *element));
+
+/*
+ * __env_read_db_config --
+ * Read the DB_CONFIG file.
+ *
+ * PUBLIC: int __env_read_db_config __P((ENV *));
+ */
+int
+__env_read_db_config(env)
+ ENV *env;
+{
+ FILE *fp;
+ int lc, ret;
+ char *p, buf[256];
+
+ /* Parse the config file. */
+ p = NULL;
+ if ((ret = __db_appname(env,
+ DB_APP_NONE, "DB_CONFIG", NULL, &p)) != 0)
+ return (ret);
+ if (p == NULL)
+ fp = NULL;
+ else {
+ fp = fopen(p, "r");
+ __os_free(env, p);
+ }
+
+ if (fp == NULL)
+ return (0);
+
+ for (lc = 1; fgets(buf, sizeof(buf), fp) != NULL; ++lc) {
+ if ((p = strchr(buf, '\n')) == NULL)
+ p = buf + strlen(buf);
+ if (p > buf && p[-1] == '\r')
+ --p;
+ *p = '\0';
+ for (p = buf; *p != '\0' && isspace((int)*p); ++p)
+ ;
+ if (*p == '\0' || *p == '#')
+ continue;
+
+ if ((ret = __config_parse(env, p, lc)) != 0)
+ break;
+ }
+ (void)fclose(fp);
+
+ return (ret);
+}
+
+#undef CFG_GET_INT
+#define CFG_GET_INT(s, vp) do { \
+ int __ret; \
+ if ((__ret = \
+ __db_getlong(env->dbenv, NULL, s, 0, INT_MAX, vp)) != 0) \
+ return (__ret); \
+} while (0)
+#undef CFG_GET_LONG
+#define CFG_GET_LONG(s, vp) do { \
+ int __ret; \
+ if ((__ret = \
+ __db_getlong(env->dbenv, NULL, s, 0, LONG_MAX, vp)) != 0) \
+ return (__ret); \
+} while (0)
+#undef CFG_GET_UINT
+#define CFG_GET_UINT(s, vp) do { \
+ int __ret; \
+ if ((__ret = \
+ __db_getulong(env->dbenv, NULL, s, 0, UINT_MAX, vp)) != 0) \
+ return (__ret); \
+} while (0)
+#undef CFG_GET_UINT32
+#define CFG_GET_UINT32(s, vp) do { \
+ if (__db_getulong(env->dbenv, NULL, s, 0, UINT32_MAX, vp) != 0) \
+ return (EINVAL); \
+} while (0)
+
+/* This is the maximum number of tokens in a DB_CONFIG line. */
+#undef CFG_SLOTS
+#define CFG_SLOTS 10
+
+/*
+ * __config_parse --
+ * Parse a single NAME VALUE pair.
+ */
+static int
+__config_parse(env, s, lc)
+ ENV *env;
+ char *s;
+ int lc;
+{
+ DB_ENV *dbenv;
+ DB_SITE *site;
+ u_long uv1, uv2;
+ long lv1, lv2;
+ u_int port;
+ int i, nf, onoff, bad, ret, t_ret;
+ char *argv[CFG_SLOTS];
+ const CFG_DESC *desc;
+
+ bad = 0;
+ dbenv = env->dbenv;
+
+ /*
+ * Split the input line in 's' into its argv-like components, returning
+ * the number of fields. If the command is one of the "simple" ones in
+ * config_descs, also return its command descriptor.
+ */
+ if ((nf = __config_scan(s, argv, &desc)) < 2) {
+format: __db_errx(env, DB_STR_A("1584",
+ "line %d: %s: incorrect name-value pair", "%d %s"),
+ lc, argv[0]);
+ return (EINVAL);
+ }
+
+ /* Handle simple configuration lines here. */
+ if (desc != NULL) {
+ ret = 0;
+ switch (desc->type) {
+ case CFG_INT: /* <command> <int> */
+ if (nf != 2)
+ goto format;
+ CFG_GET_INT(argv[1], &lv1);
+ ret = ((CFG_FUNC_INT)desc->func)(dbenv, (int) lv1);
+ break;
+
+ case CFG_LONG: /* <command> <long int> */
+ if (nf != 2)
+ goto format;
+ CFG_GET_LONG(argv[1], &lv1);
+ ret = ((CFG_FUNC_LONG)desc->func)(dbenv, lv1);
+ break;
+
+ case CFG_UINT: /* <command> <uint> */
+ if (nf != 2)
+ goto format;
+ CFG_GET_UINT(argv[1], &uv1);
+ ret = ((CFG_FUNC_UINT)desc->func)
+ (dbenv, (u_int32_t) uv1);
+ break;
+
+ case CFG_2INT: /* <command> <int1> <int2> */
+ if (nf != 3)
+ goto format;
+ CFG_GET_INT(argv[1], &lv1);
+ CFG_GET_INT(argv[2], &lv2);
+ ret = ((CFG_FUNC_2INT)desc->func)
+ (dbenv, (int) lv1, (int) lv2);
+ break;
+
+ case CFG_2UINT: /* <command> <uint1> <uint2> */
+ if (nf != 3)
+ goto format;
+ CFG_GET_UINT(argv[1], &uv1);
+ CFG_GET_UINT(argv[2], &uv2);
+ ret = ((CFG_FUNC_2UINT)desc->func)
+ (dbenv, (u_int32_t) uv1, (u_int32_t) uv2);
+ break;
+
+ case CFG_STRING: /* <command> <rest of line as string> */
+ ret = ((CFG_FUNC_STRING) desc->func)(dbenv, argv[1]);
+ break;
+ }
+ return (ret);
+ }
+
+ /*
+ * The commands not covered in config_descs are handled below, each
+ * with their own command-specific block of code. Most of them are
+ * fairly similar to each other, but not quite enough to warrant
+ * that they all be table-driven too.
+ */
+
+ /* set_memory_init db_mem_XXX <unsigned> */
+ if (strcasecmp(argv[0], "set_memory_init") == 0) {
+ if (nf != 3)
+ goto format;
+ if ((lv1 = __db_name_to_val(config_mem_init, argv[1])) == -1)
+ goto format;
+ CFG_GET_UINT32(argv[2], &uv2);
+ return (__env_set_memory_init(dbenv,
+ (DB_MEM_CONFIG) lv1, (u_int32_t)uv2));
+ }
+
+ /* rep_set_config { db_rep_conf_XXX | db_repmgr_conf_XXX } [on|off] */
+ if (strcasecmp(argv[0], "rep_set_config") == 0) {
+ if (nf != 2 && nf != 3)
+ goto format;
+ onoff = 1;
+ if (nf == 3) {
+ if (strcasecmp(argv[2], "off") == 0)
+ onoff = 0;
+ else if (strcasecmp(argv[2], "on") != 0)
+ goto format;
+ }
+ if ((lv1 = __db_name_to_val(config_rep_config, argv[1])) == -1)
+ goto format;
+ return (__rep_set_config(dbenv, (u_int32_t)lv1, onoff));
+ }
+
+ /* rep_set_timeout db_rep_XXX <unsigned> */
+ if (strcasecmp(argv[0], "rep_set_timeout") == 0) {
+ if (nf != 3)
+ goto format;
+ if ((lv1 = __db_name_to_val(config_rep_timeout, argv[1])) == -1)
+ goto format;
+ CFG_GET_UINT32(argv[2], &uv2);
+ return (__rep_set_timeout(dbenv, lv1, (db_timeout_t)uv2));
+ }
+
+ /* repmgr_set_ack_policy db_repmgr_acks_XXX */
+ if (strcasecmp(argv[0], "repmgr_set_ack_policy") == 0) {
+ if (nf != 2)
+ goto format;
+ if ((lv1 =
+ __db_name_to_val(config_repmgr_ack_policy, argv[1])) == -1)
+ goto format;
+ return (__repmgr_set_ack_policy(dbenv, lv1));
+ }
+
+ /*
+ * Configure name/value pairs of config information for a site (local or
+ * remote).
+ *
+ * repmgr_site host port [which value(on | off | unsigned)}] ...
+ */
+ if (strcasecmp(argv[0], "repmgr_site") == 0) {
+ if (nf < 3 || (nf % 2) == 0)
+ goto format;
+ CFG_GET_UINT(argv[2], &uv2);
+ port = (u_int)uv2;
+
+ if ((ret = __repmgr_site(dbenv, argv[1], port, &site, 0)) != 0)
+ return (ret);
+#ifdef HAVE_REPLICATION_THREADS
+ for (i = 3; i < nf; i += 2) {
+ if ((lv1 = __db_name_to_val(
+ config_repmgr_site, argv[i])) == -1) {
+ bad = 1;
+ break;
+ }
+
+ if (strcasecmp(argv[i + 1], "on") == 0)
+ uv2 = 1;
+ else if (strcasecmp(argv[i + 1], "off") == 0)
+ uv2 = 0;
+ else
+ CFG_GET_UINT32(argv[i + 1], &uv2);
+ if ((ret = __repmgr_site_config(site,
+ (u_int32_t)lv1, (u_int32_t)uv2)) != 0)
+ break;
+ }
+ if ((t_ret = __repmgr_site_close(site)) != 0 && ret == 0)
+ ret = t_ret;
+ if (bad)
+ goto format;
+#else
+ /* If repmgr not built, __repmgr_site() returns DB_OPNOTSUP. */
+ COMPQUIET(i, 0);
+ COMPQUIET(t_ret, 0);
+ DB_ASSERT(env, 0);
+#endif
+ return (ret);
+ }
+
+ /* set_cachesize <unsigned gbytes> <unsigned bytes> <int ncaches> */
+ if (strcasecmp(argv[0], "set_cachesize") == 0) {
+ if (nf != 4)
+ goto format;
+ CFG_GET_UINT32(argv[1], &uv1);
+ CFG_GET_UINT32(argv[2], &uv2);
+ CFG_GET_INT(argv[3], &lv1);
+ return (__memp_set_cachesize(
+ dbenv, (u_int32_t)uv1, (u_int32_t)uv2, (int)lv1));
+ }
+
+ /* set_intermediate_dir <integer dir permission> */
+ if (strcasecmp(argv[0], "set_intermediate_dir") == 0) {
+ if (nf != 2)
+ goto format;
+ CFG_GET_INT(argv[1], &lv1);
+ if (lv1 <= 0)
+ goto format;
+ env->dir_mode = (int)lv1;
+ return (0);
+ }
+
+ /* set_flags <env or log flag name> [on | off] */
+ if (strcasecmp(argv[0], "set_flags") == 0) {
+ if (nf != 2 && nf != 3)
+ goto format;
+ onoff = 1;
+ if (nf == 3) {
+ if (strcasecmp(argv[2], "off") == 0)
+ onoff = 0;
+ else if (strcasecmp(argv[2], "on") != 0)
+ goto format;
+ }
+ /* First see whether it is an env flag, then a log flag. */
+ if ((lv1 = __db_name_to_val(config_set_flags, argv[1])) != -1)
+ return (__env_set_flags(dbenv, (u_int32_t)lv1, onoff));
+ else if ((lv1 =
+ __db_name_to_val(config_set_flags_forlog, argv[1])) != -1)
+ return (__log_set_config(dbenv, (u_int32_t)lv1, onoff));
+ goto format;
+ }
+
+ /* log_set_config <log flag name> [on | off] */
+ if (strcasecmp(argv[0], "log_set_config") == 0) {
+ if (nf != 2 && nf != 3)
+ goto format;
+ onoff = 1;
+ if (nf == 3) {
+ if (strcasecmp(argv[2], "off") == 0)
+ onoff = 0;
+ else if (strcasecmp(argv[2], "on") != 0)
+ goto format;
+ }
+ if ((lv1 =
+ __db_name_to_val(config_log_set_config, argv[1])) == -1)
+ goto format;
+ return (__log_set_config(dbenv, (u_int32_t)lv1, onoff));
+ }
+
+ /* set_lk_detect db_lock_xxx */
+ if (strcasecmp(argv[0], "set_lk_detect") == 0) {
+ if (nf != 2)
+ goto format;
+ if ((lv1 =
+ __db_name_to_val(config_set_lk_detect, argv[1])) == -1)
+ goto format;
+ return (__lock_set_lk_detect(dbenv, (u_int32_t)lv1));
+ }
+
+ /* set_lock_timeout <unsigned lock timeout> */
+ if (strcasecmp(argv[0], "set_lock_timeout") == 0) {
+ if (nf != 2)
+ goto format;
+ CFG_GET_UINT32(argv[1], &uv1);
+ return (__lock_set_env_timeout(
+ dbenv, (u_int32_t)uv1, DB_SET_LOCK_TIMEOUT));
+ }
+
+ /* set_open_flags <env open flag name> [on | off] */
+ if (strcasecmp(argv[0], "set_open_flags") == 0) {
+ if (nf != 2 && nf != 3)
+ goto format;
+ onoff = 1;
+ if (nf == 3) {
+ if (strcasecmp(argv[2], "off") == 0)
+ onoff = 0;
+ else if (strcasecmp(argv[2], "on") != 0)
+ goto format;
+ }
+ if ((lv1 =
+ __db_name_to_val(config_set_open_flags, argv[1])) == -1)
+ goto format;
+ if (onoff == 1)
+ FLD_SET(env->open_flags, (u_int32_t)lv1);
+ else
+ FLD_CLR(env->open_flags, (u_int32_t)lv1);
+ return (0);
+ }
+
+ /* set_region_init <0 or 1> */
+ if (strcasecmp(argv[0], "set_region_init") == 0) {
+ if (nf != 2)
+ goto format;
+ CFG_GET_INT(argv[1], &lv1);
+ if (lv1 != 0 && lv1 != 1)
+ goto format;
+ return (__env_set_flags(
+ dbenv, DB_REGION_INIT, lv1 == 0 ? 0 : 1));
+ }
+
+ /* set_reg_timeout <unsigned timeout> */
+ if (strcasecmp(argv[0], "set_reg_timeout") == 0) {
+ if (nf != 2)
+ goto format;
+ CFG_GET_UINT32(argv[1], &uv1);
+ return (__env_set_timeout(
+ dbenv, (u_int32_t)uv1, DB_SET_REG_TIMEOUT));
+ }
+
+ /* set_txn_timeout <unsigned timeout> */
+ if (strcasecmp(argv[0], "set_txn_timeout") == 0) {
+ if (nf != 2)
+ goto format;
+ CFG_GET_UINT32(argv[1], &uv1);
+ return (__lock_set_env_timeout(
+ dbenv, (u_int32_t)uv1, DB_SET_TXN_TIMEOUT));
+ }
+
+ /* set_verbose db_verb_XXX [on | off] */
+ if (strcasecmp(argv[0], "set_verbose") == 0) {
+ if (nf != 2 && nf != 3)
+ goto format;
+ onoff = 1;
+ if (nf == 3) {
+ if (strcasecmp(argv[2], "off") == 0)
+ onoff = 0;
+ else if (strcasecmp(argv[2], "on") != 0)
+ goto format;
+ }
+ if ((lv1 = __db_name_to_val(config_set_verbose, argv[1])) == -1)
+ goto format;
+ return (__env_set_verbose(dbenv, (u_int32_t)lv1, onoff));
+ }
+
+ __db_errx(env,
+ DB_STR_A("1585", "unrecognized name-value pair: %s", "%s"), s);
+ return (EINVAL);
+}
+
+/* cmp_cfg_name --
+ * Bsearch comparison function for CFG_DESC.name, for looking up
+ * the names of simple commmands.
+ */
+static int
+cmp_cfg_name(sought, element)
+ const void *sought;
+ const void *element;
+{
+ return
+ (strcmp((const char *) sought, ((const CFG_DESC *) element)->name));
+}
+
+/*
+ * __config_scan --
+ * Split DB_CONFIG lines into fields. Usually each whitespace separated
+ * field is scanned as a distinct argument. However, if the command is
+ * recognized as one needing a single string value, then the rest of the
+ * line is returned as the one argument. That supports strings which
+ * contain whitespaces, such as some directory paths.
+ *
+ * This returns the number of fields. It sets *descptr to the command
+ * descriptor (if it is recognized), or NULL.
+ */
+static int
+__config_scan(input, argv, descptr)
+ char *input, *argv[CFG_SLOTS];
+ const CFG_DESC **descptr;
+{
+ size_t tablecount;
+ int count;
+ char **ap;
+
+ tablecount = sizeof(config_descs) / sizeof(config_descs[0]);
+ *descptr = NULL;
+ for (count = 0, ap = argv; (*ap = strsep(&input, " \t\n")) != NULL;) {
+ /* Empty tokens are adjacent whitespaces; skip them. */
+ if (**ap == '\0')
+ continue;
+ /* Accept a non-empty token as the next field. */
+ count++;
+ ap++;
+ /*
+ * If that was the first token, look it up in the simple command
+ * table. If it is there and takes a single string value, then
+ * return the remainder of the line (after skipping over any
+ * leading whitespaces) without splitting it further.
+ */
+ if (count == 1) {
+ *descptr = bsearch(argv[0], config_descs,
+ tablecount, sizeof(config_descs[0]), cmp_cfg_name);
+ if (*descptr != NULL &&
+ (*descptr)->type == CFG_STRING) {
+ count++;
+ while (isspace(*input))
+ input++;
+ *ap++ = input;
+ break;
+ }
+ }
+ /* Stop scanning if the line has too many tokens. */
+ if (count >= CFG_SLOTS)
+ break;
+ }
+ return (count);
+}
diff --git a/src/env/env_failchk.c b/src/env/env_failchk.c
new file mode 100644
index 00000000..05752f07
--- /dev/null
+++ b/src/env/env_failchk.c
@@ -0,0 +1,558 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2005, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#ifndef HAVE_SIMPLE_THREAD_TYPE
+#include "dbinc/db_page.h"
+#include "dbinc/hash.h" /* Needed for call to __ham_func5. */
+#endif
+#include "dbinc/lock.h"
+#include "dbinc/log.h"
+#include "dbinc/mp.h"
+#include "dbinc/txn.h"
+
+static int __env_in_api __P((ENV *));
+static void __env_clear_state __P((ENV *));
+
+/*
+ * __env_failchk_pp --
+ * ENV->failchk pre/post processing.
+ *
+ * PUBLIC: int __env_failchk_pp __P((DB_ENV *, u_int32_t));
+ */
+int
+__env_failchk_pp(dbenv, flags)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->failchk");
+
+ /*
+ * ENV->failchk requires self and is-alive functions. We
+ * have a default self function, but no is-alive function.
+ */
+ if (!ALIVE_ON(env)) {
+ __db_errx(env, DB_STR("1503",
+ "DB_ENV->failchk requires DB_ENV->is_alive be configured"));
+ return (EINVAL);
+ }
+
+ if (flags != 0)
+ return (__db_ferr(env, "DB_ENV->failchk", 0));
+
+ ENV_ENTER(env, ip);
+ FAILCHK_THREAD(env, ip); /* mark as failchk thread */
+ ret = __env_failchk_int(dbenv);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+/*
+ * __env_failchk_int --
+ * Process the subsystem failchk routines
+ *
+ * PUBLIC: int __env_failchk_int __P((DB_ENV *));
+ */
+int
+__env_failchk_int(dbenv)
+ DB_ENV *dbenv;
+{
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+ F_SET(dbenv, DB_ENV_FAILCHK);
+
+ /*
+ * We check for dead threads in the API first as this would be likely
+ * to hang other things we try later, like locks and transactions.
+ */
+ if ((ret = __env_in_api(env)) != 0)
+ goto err;
+
+ if (LOCKING_ON(env) && (ret = __lock_failchk(env)) != 0)
+ goto err;
+
+ if (TXN_ON(env) &&
+ ((ret = __txn_failchk(env)) != 0 ||
+ (ret = __dbreg_failchk(env)) != 0))
+ goto err;
+
+ if ((ret = __memp_failchk(env)) != 0)
+ goto err;
+
+#ifdef HAVE_REPLICATION_THREADS
+ if (REP_ON(env) && (ret = __repmgr_failchk(env)) != 0)
+ goto err;
+#endif
+
+ /* Mark any dead blocked threads as dead. */
+ __env_clear_state(env);
+
+#ifdef HAVE_MUTEX_SUPPORT
+ ret = __mut_failchk(env);
+#endif
+
+err: F_CLR(dbenv, DB_ENV_FAILCHK);
+ return (ret);
+}
+
+/*
+ * __env_thread_size --
+ * Initial amount of memory for thread info blocks.
+ * PUBLIC: size_t __env_thread_size __P((ENV *, size_t));
+ */
+size_t
+__env_thread_size(env, other_alloc)
+ ENV *env;
+ size_t other_alloc;
+{
+ DB_ENV *dbenv;
+ size_t size;
+ u_int32_t max;
+
+ dbenv = env->dbenv;
+ size = 0;
+
+ max = dbenv->thr_max;
+ if (dbenv->thr_init != 0) {
+ size =
+ dbenv->thr_init * __env_alloc_size(sizeof(DB_THREAD_INFO));
+ if (max < dbenv->thr_init)
+ max = dbenv->thr_init;
+ } else if (max == 0 && ALIVE_ON(env)) {
+ if ((max = dbenv->tx_init) == 0) {
+ /*
+ * They want thread tracking, but don't say how much.
+ * Arbitrarily assume 1/10 of the remaining memory
+ * or at least 100. We just use this to size
+ * the hash table.
+ */
+ if (dbenv->memory_max != 0)
+ max = (u_int32_t)
+ (((dbenv->memory_max - other_alloc) / 10) /
+ sizeof(DB_THREAD_INFO));
+ if (max < 100)
+ max = 100;
+ }
+ }
+ /*
+ * Set the number of buckets to be 1/8th the number of
+ * thread control blocks. This is rather arbitrary.
+ */
+ dbenv->thr_max = max;
+ if (max != 0)
+ size += __env_alloc_size(sizeof(DB_HASHTAB) *
+ __db_tablesize(max / 8));
+ return (size);
+}
+
+/*
+ * __env_thread_max --
+ * Return the amount of extra memory to hold thread information.
+ * PUBLIC: size_t __env_thread_max __P((ENV *));
+ */
+size_t
+__env_thread_max(env)
+ ENV *env;
+{
+ DB_ENV *dbenv;
+ size_t size;
+
+ dbenv = env->dbenv;
+
+ /*
+ * Allocate space for thread info blocks. Max is only advisory,
+ * so we allocate 25% more.
+ */
+ if (dbenv->thr_max > dbenv->thr_init) {
+ size = dbenv->thr_max - dbenv->thr_init;
+ size += size / 4;
+ } else {
+ dbenv->thr_max = dbenv->thr_init;
+ size = dbenv->thr_init / 4;
+ }
+
+ size = size * __env_alloc_size(sizeof(DB_THREAD_INFO));
+ return (size);
+}
+
+/*
+ * __env_thread_init --
+ * Initialize the thread control block table.
+ *
+ * PUBLIC: int __env_thread_init __P((ENV *, int));
+ */
+int
+__env_thread_init(env, during_creation)
+ ENV *env;
+ int during_creation;
+{
+ DB_ENV *dbenv;
+ DB_HASHTAB *htab;
+ REGENV *renv;
+ REGINFO *infop;
+ THREAD_INFO *thread;
+ int ret;
+
+ dbenv = env->dbenv;
+ infop = env->reginfo;
+ renv = infop->primary;
+
+ if (renv->thread_off == INVALID_ROFF) {
+ if (dbenv->thr_max == 0) {
+ env->thr_hashtab = NULL;
+ if (ALIVE_ON(env)) {
+ __db_errx(env, DB_STR("1504",
+ "is_alive method specified but no thread region allocated"));
+ return (EINVAL);
+ }
+ return (0);
+ }
+
+ if (!during_creation) {
+ __db_errx(env, DB_STR("1505",
+"thread table must be allocated when the database environment is created"));
+ return (EINVAL);
+ }
+
+ if ((ret =
+ __env_alloc(infop, sizeof(THREAD_INFO), &thread)) != 0) {
+ __db_err(env, ret, DB_STR("1506",
+ "unable to allocate a thread status block"));
+ return (ret);
+ }
+ memset(thread, 0, sizeof(*thread));
+ renv->thread_off = R_OFFSET(infop, thread);
+ thread->thr_nbucket = __db_tablesize(dbenv->thr_max / 8);
+ if ((ret = __env_alloc(infop,
+ thread->thr_nbucket * sizeof(DB_HASHTAB), &htab)) != 0)
+ return (ret);
+ thread->thr_hashoff = R_OFFSET(infop, htab);
+ __db_hashinit(htab, thread->thr_nbucket);
+ thread->thr_max = dbenv->thr_max;
+ thread->thr_init = dbenv->thr_init;
+ } else {
+ thread = R_ADDR(infop, renv->thread_off);
+ htab = R_ADDR(infop, thread->thr_hashoff);
+ }
+
+ env->thr_hashtab = htab;
+ env->thr_nbucket = thread->thr_nbucket;
+ dbenv->thr_max = thread->thr_max;
+ dbenv->thr_init = thread->thr_init;
+ return (0);
+}
+
+/*
+ * __env_thread_destroy --
+ * Destroy the thread control block table.
+ *
+ * PUBLIC: void __env_thread_destroy __P((ENV *));
+ */
+void
+__env_thread_destroy(env)
+ ENV *env;
+{
+ DB_HASHTAB *htab;
+ DB_THREAD_INFO *ip, *np;
+ REGENV *renv;
+ REGINFO *infop;
+ THREAD_INFO *thread;
+ u_int32_t i;
+
+ infop = env->reginfo;
+ renv = infop->primary;
+ if (renv->thread_off == INVALID_ROFF)
+ return;
+
+ thread = R_ADDR(infop, renv->thread_off);
+ if ((htab = env->thr_hashtab) != NULL) {
+ for (i = 0; i < env->thr_nbucket; i++) {
+ ip = SH_TAILQ_FIRST(&htab[i], __db_thread_info);
+ for (; ip != NULL; ip = np) {
+ np = SH_TAILQ_NEXT(ip,
+ dbth_links, __db_thread_info);
+ __env_alloc_free(infop, ip);
+ }
+ }
+ __env_alloc_free(infop, htab);
+ }
+
+ __env_alloc_free(infop, thread);
+ return;
+}
+
+/*
+ * __env_in_api --
+ * Look for threads which died in the api and complain.
+ * If no threads died but there are blocked threads unpin
+ * any buffers they may have locked.
+ */
+static int
+__env_in_api(env)
+ ENV *env;
+{
+ DB_ENV *dbenv;
+ DB_HASHTAB *htab;
+ DB_THREAD_INFO *ip;
+ REGENV *renv;
+ REGINFO *infop;
+ THREAD_INFO *thread;
+ u_int32_t i;
+ int unpin, ret;
+
+ if ((htab = env->thr_hashtab) == NULL)
+ return (EINVAL);
+
+ dbenv = env->dbenv;
+ infop = env->reginfo;
+ renv = infop->primary;
+ thread = R_ADDR(infop, renv->thread_off);
+ unpin = 0;
+
+ for (i = 0; i < env->thr_nbucket; i++)
+ SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) {
+ if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE ||
+ (ip->dbth_state == THREAD_OUT &&
+ thread->thr_count < thread->thr_max))
+ continue;
+ if (dbenv->is_alive(
+ dbenv, ip->dbth_pid, ip->dbth_tid, 0))
+ continue;
+ if (ip->dbth_state == THREAD_BLOCKED) {
+ ip->dbth_state = THREAD_BLOCKED_DEAD;
+ unpin = 1;
+ continue;
+ }
+ if (ip->dbth_state == THREAD_OUT) {
+ ip->dbth_state = THREAD_SLOT_NOT_IN_USE;
+ continue;
+ }
+ return (__db_failed(env, DB_STR("1507",
+ "Thread died in Berkeley DB library"),
+ ip->dbth_pid, ip->dbth_tid));
+ }
+
+ if (unpin == 0)
+ return (0);
+
+ for (i = 0; i < env->thr_nbucket; i++)
+ SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info)
+ if (ip->dbth_state == THREAD_BLOCKED_DEAD &&
+ (ret = __memp_unpin_buffers(env, ip)) != 0)
+ return (ret);
+
+ return (0);
+}
+
+/*
+ * __env_clear_state --
+ * Look for threads which died while blockedi and clear them..
+ */
+static void
+__env_clear_state(env)
+ ENV *env;
+{
+ DB_HASHTAB *htab;
+ DB_THREAD_INFO *ip;
+ u_int32_t i;
+
+ htab = env->thr_hashtab;
+ for (i = 0; i < env->thr_nbucket; i++)
+ SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info)
+ if (ip->dbth_state == THREAD_BLOCKED_DEAD)
+ ip->dbth_state = THREAD_SLOT_NOT_IN_USE;
+}
+
+struct __db_threadid {
+ pid_t pid;
+ db_threadid_t tid;
+};
+
+/*
+ * PUBLIC: int __env_set_state __P((ENV *, DB_THREAD_INFO **, DB_THREAD_STATE));
+ */
+int
+__env_set_state(env, ipp, state)
+ ENV *env;
+ DB_THREAD_INFO **ipp;
+ DB_THREAD_STATE state;
+{
+ struct __db_threadid id;
+ DB_ENV *dbenv;
+ DB_HASHTAB *htab;
+ DB_THREAD_INFO *ip;
+ REGENV *renv;
+ REGINFO *infop;
+ THREAD_INFO *thread;
+ u_int32_t indx;
+ int ret;
+
+ dbenv = env->dbenv;
+ htab = env->thr_hashtab;
+
+ if (F_ISSET(dbenv, DB_ENV_NOLOCKING)) {
+ *ipp = NULL;
+ return (0);
+ }
+ dbenv->thread_id(dbenv, &id.pid, &id.tid);
+
+ /*
+ * Hashing of thread ids. This is simple but could be replaced with
+ * something more expensive if needed.
+ */
+#ifdef HAVE_SIMPLE_THREAD_TYPE
+ /*
+ * A thread ID may be a pointer, so explicitly cast to a pointer of
+ * the appropriate size before doing the bitwise XOR.
+ */
+ indx = (u_int32_t)((uintptr_t)id.pid ^ (uintptr_t)id.tid);
+#else
+ indx = __ham_func5(NULL, &id.tid, sizeof(id.tid));
+#endif
+ indx %= env->thr_nbucket;
+ SH_TAILQ_FOREACH(ip, &htab[indx], dbth_links, __db_thread_info) {
+#ifdef HAVE_SIMPLE_THREAD_TYPE
+ if (id.pid == ip->dbth_pid && id.tid == ip->dbth_tid)
+ break;
+#else
+ if (memcmp(&id.pid, &ip->dbth_pid, sizeof(id.pid)) != 0)
+ continue;
+#ifdef HAVE_MUTEX_PTHREADS
+ if (pthread_equal(id.tid, ip->dbth_tid) == 0)
+#else
+ if (memcmp(&id.tid, &ip->dbth_tid, sizeof(id.tid)) != 0)
+#endif
+ continue;
+ break;
+#endif
+ }
+
+ /*
+ * If ipp is not null, return the thread control block if found.
+ * Check to ensure the thread of control has been registered.
+ */
+ if (state == THREAD_VERIFY) {
+ DB_ASSERT(env, ip != NULL && ip->dbth_state != THREAD_OUT);
+ if (ipp != NULL) {
+ if (ip == NULL) /* The control block wasn't found */
+ return (EINVAL);
+ *ipp = ip;
+ }
+ return (0);
+ }
+
+ *ipp = NULL;
+ ret = 0;
+ if (ip == NULL) {
+ infop = env->reginfo;
+ renv = infop->primary;
+ thread = R_ADDR(infop, renv->thread_off);
+ MUTEX_LOCK(env, renv->mtx_regenv);
+
+ /*
+ * If we are passed the specified max, try to reclaim one from
+ * our queue. If failcheck has marked the slot not in use, we
+ * can take it, otherwise we must call is_alive before freeing
+ * it.
+ */
+ if (thread->thr_count >= thread->thr_max) {
+ SH_TAILQ_FOREACH(
+ ip, &htab[indx], dbth_links, __db_thread_info)
+ if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE ||
+ (ip->dbth_state == THREAD_OUT &&
+ ALIVE_ON(env) && !dbenv->is_alive(
+ dbenv, ip->dbth_pid, ip->dbth_tid, 0)))
+ break;
+
+ if (ip != NULL) {
+ DB_ASSERT(env, ip->dbth_pincount == 0);
+ goto init;
+ }
+ }
+
+ thread->thr_count++;
+ if ((ret = __env_alloc(infop,
+ sizeof(DB_THREAD_INFO), &ip)) == 0) {
+ memset(ip, 0, sizeof(*ip));
+ /*
+ * This assumes we can link atomically since we do
+ * no locking here. We never use the backpointer
+ * so we only need to be able to write an offset
+ * atomically.
+ */
+ SH_TAILQ_INSERT_HEAD(
+ &htab[indx], ip, dbth_links, __db_thread_info);
+ ip->dbth_pincount = 0;
+ ip->dbth_pinmax = PINMAX;
+ ip->dbth_pinlist = R_OFFSET(infop, ip->dbth_pinarray);
+
+init: ip->dbth_pid = id.pid;
+ ip->dbth_tid = id.tid;
+ ip->dbth_state = state;
+ SH_TAILQ_INIT(&ip->dbth_xatxn);
+ }
+ MUTEX_UNLOCK(env, renv->mtx_regenv);
+ } else
+ ip->dbth_state = state;
+ *ipp = ip;
+
+ DB_ASSERT(env, ret == 0);
+ if (ret != 0)
+ __db_errx(env, DB_STR("1508",
+ "Unable to allocate thread control block"));
+ return (ret);
+}
+
+/*
+ * __env_thread_id_string --
+ * Convert a thread id to a string.
+ *
+ * PUBLIC: char *__env_thread_id_string
+ * PUBLIC: __P((DB_ENV *, pid_t, db_threadid_t, char *));
+ */
+char *
+__env_thread_id_string(dbenv, pid, tid, buf)
+ DB_ENV *dbenv;
+ pid_t pid;
+ db_threadid_t tid;
+ char *buf;
+{
+#ifdef HAVE_SIMPLE_THREAD_TYPE
+#ifdef UINT64_FMT
+ char fmt[20];
+
+ snprintf(fmt, sizeof(fmt), "%s/%s", UINT64_FMT, UINT64_FMT);
+ snprintf(buf,
+ DB_THREADID_STRLEN, fmt, (u_int64_t)pid, (u_int64_t)(uintptr_t)tid);
+#else
+ snprintf(buf, DB_THREADID_STRLEN, "%lu/%lu", (u_long)pid, (u_long)tid);
+#endif
+#else
+#ifdef UINT64_FMT
+ char fmt[20];
+
+ snprintf(fmt, sizeof(fmt), "%s/TID", UINT64_FMT);
+ snprintf(buf, DB_THREADID_STRLEN, fmt, (u_int64_t)pid);
+#else
+ snprintf(buf, DB_THREADID_STRLEN, "%lu/TID", (u_long)pid);
+#endif
+#endif
+ COMPQUIET(dbenv, NULL);
+ COMPQUIET(*(u_int8_t *)&tid, 0);
+
+ return (buf);
+}
diff --git a/src/env/env_file.c b/src/env/env_file.c
new file mode 100644
index 00000000..b102404d
--- /dev/null
+++ b/src/env/env_file.c
@@ -0,0 +1,128 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2002, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+/*
+ * __db_file_extend --
+ * Initialize a regular file by writing the last page of the file.
+ *
+ * PUBLIC: int __db_file_extend __P((ENV *, DB_FH *, size_t));
+ */
+int
+__db_file_extend(env, fhp, size)
+ ENV *env;
+ DB_FH *fhp;
+ size_t size;
+{
+ db_pgno_t pages;
+ size_t nw;
+ u_int32_t relative;
+ int ret;
+ char buf;
+
+ buf = '\0';
+ /*
+ * Extend the file by writing the last page. If the region is >4Gb,
+ * increment may be larger than the maximum possible seek "relative"
+ * argument, as it's an unsigned 32-bit value. Break the offset into
+ * pages of 1MB each so we don't overflow -- (2^20 * 2^32 is bigger
+ * than any memory I expect to see for awhile).
+ */
+ pages = (db_pgno_t)((size - sizeof(buf)) / MEGABYTE);
+ relative = (u_int32_t)((size - sizeof(buf)) % MEGABYTE);
+ if ((ret = __os_seek(env, fhp, pages, MEGABYTE, relative)) == 0)
+ ret = __os_write(env, fhp, &buf, sizeof(buf), &nw);
+
+ return (ret);
+}
+
+/*
+ * __db_file_multi_write --
+ * Overwrite a file with multiple passes to corrupt the data.
+ *
+ * PUBLIC: int __db_file_multi_write __P((ENV *, const char *));
+ */
+int
+__db_file_multi_write(env, path)
+ ENV *env;
+ const char *path;
+{
+ DB_FH *fhp;
+ u_int32_t mbytes, bytes;
+ int ret;
+
+ if ((ret = __os_open(env, path, 0, DB_OSO_REGION, 0, &fhp)) == 0 &&
+ (ret = __os_ioinfo(env, path, fhp, &mbytes, &bytes, NULL)) == 0) {
+ /*
+ * !!!
+ * Overwrite a regular file with alternating 0xff, 0x00 and 0xff
+ * byte patterns. Implies a fixed-block filesystem, journaling
+ * or logging filesystems will require operating system support.
+ */
+ if ((ret =
+ __db_file_write(env, fhp, mbytes, bytes, 255)) != 0)
+ goto err;
+ if ((ret =
+ __db_file_write(env, fhp, mbytes, bytes, 0)) != 0)
+ goto err;
+ if ((ret =
+ __db_file_write(env, fhp, mbytes, bytes, 255)) != 0)
+ goto err;
+ } else
+ __db_err(env, ret, "%s", path);
+
+err: if (fhp != NULL)
+ (void)__os_closehandle(env, fhp);
+ return (ret);
+}
+
+/*
+ * __db_file_write --
+ * A single pass over the file, writing the specified byte pattern.
+ *
+ * PUBLIC: int __db_file_write __P((ENV *,
+ * PUBLIC: DB_FH *, u_int32_t, u_int32_t, int));
+ */
+int
+__db_file_write(env, fhp, mbytes, bytes, pattern)
+ ENV *env;
+ DB_FH *fhp;
+ int pattern;
+ u_int32_t mbytes, bytes;
+{
+ size_t len, nw;
+ int i, ret;
+ char *buf;
+
+#undef FILE_WRITE_IO_SIZE
+#define FILE_WRITE_IO_SIZE (64 * 1024)
+ if ((ret = __os_malloc(env, FILE_WRITE_IO_SIZE, &buf)) != 0)
+ return (ret);
+ memset(buf, pattern, FILE_WRITE_IO_SIZE);
+
+ if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
+ goto err;
+ for (; mbytes > 0; --mbytes)
+ for (i = MEGABYTE / FILE_WRITE_IO_SIZE; i > 0; --i)
+ if ((ret = __os_write(
+ env, fhp, buf, FILE_WRITE_IO_SIZE, &nw)) != 0)
+ goto err;
+ for (; bytes > 0; bytes -= (u_int32_t)len) {
+ len = bytes < FILE_WRITE_IO_SIZE ? bytes : FILE_WRITE_IO_SIZE;
+ if ((ret = __os_write(env, fhp, buf, len, &nw)) != 0)
+ goto err;
+ }
+
+ ret = __os_fsync(env, fhp);
+
+err: __os_free(env, buf);
+ return (ret);
+}
diff --git a/src/env/env_globals.c b/src/env/env_globals.c
new file mode 100644
index 00000000..955e6738
--- /dev/null
+++ b/src/env/env_globals.c
@@ -0,0 +1,66 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+/*
+ * A structure with static initialization values for all of the global fields
+ * used by Berkeley DB.
+ * See dbinc/globals.h for the structure definition.
+ */
+DB_GLOBALS __db_global_values = {
+#ifdef HAVE_VXWORKS
+ 0, /* VxWorks: db_global_init */
+ NULL, /* VxWorks: db_global_lock */
+#endif
+#ifdef DB_WIN32
+#ifndef DB_WINCE
+ { 0 }, /* SECURITY_DESCRIPTOR win_default_sec_desc */
+ { 0 }, /* SECURITY_ATTRIBUTES win_default_sec_attr */
+#endif
+ NULL, /* SECURITY_ATTRIBUTES *win_sec_attr */
+#endif
+ { NULL, NULL }, /* XA env list */
+
+ "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=", /* db_line */
+ { 0 }, /* error_buf */
+ 0, /* uid_init */
+ 0, /* rand_next */
+ 0, /* fid_serial */
+ 0, /* db_errno */
+ 0, /* num_active_pids */
+ 0, /* size_active_pids */
+ NULL, /* active_pids */
+ NULL, /* saved_errstr */
+ NULL, /* j_assert */
+ NULL, /* j_close */
+ NULL, /* j_dirfree */
+ NULL, /* j_dirlist */
+ NULL, /* j_exists*/
+ NULL, /* j_free */
+ NULL, /* j_fsync */
+ NULL, /* j_ftruncate */
+ NULL, /* j_ioinfo */
+ NULL, /* j_malloc */
+ NULL, /* j_file_map */
+ NULL, /* j_file_unmap */
+ NULL, /* j_open */
+ NULL, /* j_pread */
+ NULL, /* j_pwrite */
+ NULL, /* j_read */
+ NULL, /* j_realloc */
+ NULL, /* j_region_map */
+ NULL, /* j_region_unmap */
+ NULL, /* j_rename */
+ NULL, /* j_seek */
+ NULL, /* j_unlink */
+ NULL, /* j_write */
+ NULL /* j_yield */
+};
diff --git a/src/env/env_method.c b/src/env/env_method.c
new file mode 100644
index 00000000..63deacea
--- /dev/null
+++ b/src/env/env_method.c
@@ -0,0 +1,1918 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id: env_method.c,v dabaaeb7d839 2010/08/03 17:28:53 mike $
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/crypto.h"
+#include "dbinc/hmac.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
+#include "dbinc/txn.h"
+
+static int __db_env_init __P((DB_ENV *));
+static void __env_err __P((const DB_ENV *, int, const char *, ...));
+static void __env_errx __P((const DB_ENV *, const char *, ...));
+static int __env_get_create_dir __P((DB_ENV *, const char **));
+static int __env_get_data_dirs __P((DB_ENV *, const char ***));
+static int __env_get_data_len __P((DB_ENV *, u_int32_t *));
+static int __env_get_flags __P((DB_ENV *, u_int32_t *));
+static int __env_get_home __P((DB_ENV *, const char **));
+static int __env_get_intermediate_dir_mode __P((DB_ENV *, const char **));
+static int __env_get_metadata_dir __P((DB_ENV *, const char **));
+static int __env_get_shm_key __P((DB_ENV *, long *));
+static int __env_get_thread_count __P((DB_ENV *, u_int32_t *));
+static int __env_get_thread_id_fn __P((DB_ENV *,
+ void (**)(DB_ENV *, pid_t *, db_threadid_t *)));
+static int __env_get_thread_id_string_fn __P((DB_ENV *,
+ char * (**)(DB_ENV *, pid_t, db_threadid_t, char *)));
+static int __env_get_timeout __P((DB_ENV *, db_timeout_t *, u_int32_t));
+static int __env_get_tmp_dir __P((DB_ENV *, const char **));
+static int __env_get_verbose __P((DB_ENV *, u_int32_t, int *));
+static int __env_get_app_dispatch
+ __P((DB_ENV *, int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
+static int __env_set_app_dispatch
+ __P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
+static int __env_set_event_notify
+ __P((DB_ENV *, void (*)(DB_ENV *, u_int32_t, void *)));
+static int __env_get_feedback __P((DB_ENV *, void (**)(DB_ENV *, int, int)));
+static int __env_set_feedback __P((DB_ENV *, void (*)(DB_ENV *, int, int)));
+static int __env_get_isalive __P((DB_ENV *,
+ int (**)(DB_ENV *, pid_t, db_threadid_t, u_int32_t)));
+static int __env_set_isalive __P((DB_ENV *,
+ int (*)(DB_ENV *, pid_t, db_threadid_t, u_int32_t)));
+static int __env_set_thread_id __P((DB_ENV *, void (*)(DB_ENV *,
+ pid_t *, db_threadid_t *)));
+static int __env_set_thread_id_string __P((DB_ENV *,
+ char * (*)(DB_ENV *, pid_t, db_threadid_t, char *)));
+
+/*
+ * db_env_create --
+ * DB_ENV constructor.
+ *
+ * EXTERN: int db_env_create __P((DB_ENV **, u_int32_t));
+ */
+int
+db_env_create(dbenvpp, flags)
+ DB_ENV **dbenvpp;
+ u_int32_t flags;
+{
+ DB_ENV *dbenv;
+ ENV *env;
+ int ret;
+
+ /*
+ * !!!
+ * Our caller has not yet had the opportunity to reset the panic
+ * state or turn off mutex locking, and so we can neither check
+ * the panic state or acquire a mutex in the DB_ENV create path.
+ *
+ * !!!
+ * We can't call the flags-checking routines, we don't have an
+ * environment yet.
+ */
+ if (flags != 0)
+ return (EINVAL);
+
+ /* Allocate the DB_ENV and ENV structures -- we always have both. */
+ if ((ret = __os_calloc(NULL, 1, sizeof(DB_ENV), &dbenv)) != 0)
+ return (ret);
+ if ((ret = __os_calloc(NULL, 1, sizeof(ENV), &env)) != 0)
+ goto err;
+ dbenv->env = env;
+ env->dbenv = dbenv;
+
+ if ((ret = __db_env_init(dbenv)) != 0 ||
+ (ret = __lock_env_create(dbenv)) != 0 ||
+ (ret = __log_env_create(dbenv)) != 0 ||
+ (ret = __memp_env_create(dbenv)) != 0 ||
+#ifdef HAVE_REPLICATION
+ (ret = __rep_env_create(dbenv)) != 0 ||
+#endif
+ (ret = __txn_env_create(dbenv)))
+ goto err;
+
+ *dbenvpp = dbenv;
+ return (0);
+
+err: __db_env_destroy(dbenv);
+ return (ret);
+}
+
+/*
+ * __db_env_destroy --
+ * DB_ENV destructor.
+ *
+ * PUBLIC: void __db_env_destroy __P((DB_ENV *));
+ */
+void
+__db_env_destroy(dbenv)
+ DB_ENV *dbenv;
+{
+ __lock_env_destroy(dbenv);
+ __log_env_destroy(dbenv);
+ __memp_env_destroy(dbenv);
+#ifdef HAVE_REPLICATION
+ __rep_env_destroy(dbenv);
+#endif
+ __txn_env_destroy(dbenv);
+
+ /*
+ * Discard the underlying ENV structure.
+ *
+ * XXX
+ * This is wrong, but can't be fixed until we finish the work of
+ * splitting up the DB_ENV and ENV structures so that we don't
+ * touch anything in the ENV as part of the above calls to subsystem
+ * DB_ENV cleanup routines.
+ */
+ memset(dbenv->env, CLEAR_BYTE, sizeof(ENV));
+ __os_free(NULL, dbenv->env);
+
+ memset(dbenv, CLEAR_BYTE, sizeof(DB_ENV));
+ __os_free(NULL, dbenv);
+}
+
+/*
+ * __db_env_init --
+ * Initialize a DB_ENV structure.
+ */
+static int
+__db_env_init(dbenv)
+ DB_ENV *dbenv;
+{
+ ENV *env;
+ /*
+ * !!!
+ * Our caller has not yet had the opportunity to reset the panic
+ * state or turn off mutex locking, and so we can neither check
+ * the panic state or acquire a mutex in the DB_ENV create path.
+ *
+ * Initialize the method handles.
+ */
+ /* DB_ENV PUBLIC HANDLE LIST BEGIN */
+ dbenv->add_data_dir = __env_add_data_dir;
+ dbenv->backup = __db_backup;
+ dbenv->dbbackup = __db_dbbackup_pp;
+ dbenv->cdsgroup_begin = __cdsgroup_begin_pp;
+ dbenv->close = __env_close_pp;
+ dbenv->dbremove = __env_dbremove_pp;
+ dbenv->dbrename = __env_dbrename_pp;
+ dbenv->err = __env_err;
+ dbenv->errx = __env_errx;
+ dbenv->failchk = __env_failchk_pp;
+ dbenv->fileid_reset = __env_fileid_reset_pp;
+ dbenv->get_alloc = __env_get_alloc;
+ dbenv->get_app_dispatch = __env_get_app_dispatch;
+ dbenv->get_cache_max = __memp_get_cache_max;
+ dbenv->get_cachesize = __memp_get_cachesize;
+ dbenv->get_backup_callbacks = __env_get_backup_callbacks;
+ dbenv->get_backup_config = __env_get_backup_config;
+ dbenv->get_create_dir = __env_get_create_dir;
+ dbenv->get_data_dirs = __env_get_data_dirs;
+ dbenv->get_data_len = __env_get_data_len;
+ dbenv->get_encrypt_flags = __env_get_encrypt_flags;
+ dbenv->get_errcall = __env_get_errcall;
+ dbenv->get_errfile = __env_get_errfile;
+ dbenv->get_errpfx = __env_get_errpfx;
+ dbenv->get_feedback = __env_get_feedback;
+ dbenv->get_flags = __env_get_flags;
+ dbenv->get_home = __env_get_home;
+ dbenv->get_intermediate_dir_mode = __env_get_intermediate_dir_mode;
+ dbenv->get_isalive = __env_get_isalive;
+ dbenv->get_lg_bsize = __log_get_lg_bsize;
+ dbenv->get_lg_dir = __log_get_lg_dir;
+ dbenv->get_lg_filemode = __log_get_lg_filemode;
+ dbenv->get_lg_max = __log_get_lg_max;
+ dbenv->get_lg_regionmax = __log_get_lg_regionmax;
+ dbenv->get_lk_conflicts = __lock_get_lk_conflicts;
+ dbenv->get_lk_detect = __lock_get_lk_detect;
+ dbenv->get_lk_max_lockers = __lock_get_lk_max_lockers;
+ dbenv->get_lk_max_locks = __lock_get_lk_max_locks;
+ dbenv->get_lk_max_objects = __lock_get_lk_max_objects;
+ dbenv->get_lk_partitions = __lock_get_lk_partitions;
+ dbenv->get_lk_priority = __lock_get_lk_priority;
+ dbenv->get_lk_tablesize = __lock_get_lk_tablesize;
+ dbenv->get_memory_init = __env_get_memory_init;
+ dbenv->get_memory_max = __env_get_memory_max;
+ dbenv->get_metadata_dir = __env_get_metadata_dir;
+ dbenv->get_mp_max_openfd = __memp_get_mp_max_openfd;
+ dbenv->get_mp_max_write = __memp_get_mp_max_write;
+ dbenv->get_mp_mmapsize = __memp_get_mp_mmapsize;
+ dbenv->get_mp_mtxcount = __memp_get_mp_mtxcount;
+ dbenv->get_mp_pagesize = __memp_get_mp_pagesize;
+ dbenv->get_mp_tablesize = __memp_get_mp_tablesize;
+ dbenv->get_msgcall = __env_get_msgcall;
+ dbenv->get_msgfile = __env_get_msgfile;
+ dbenv->get_open_flags = __env_get_open_flags;
+ dbenv->get_shm_key = __env_get_shm_key;
+ dbenv->get_thread_count = __env_get_thread_count;
+ dbenv->get_thread_id_fn = __env_get_thread_id_fn;
+ dbenv->get_thread_id_string_fn = __env_get_thread_id_string_fn;
+ dbenv->get_timeout = __env_get_timeout;
+ dbenv->get_tmp_dir = __env_get_tmp_dir;
+ dbenv->get_tx_max = __txn_get_tx_max;
+ dbenv->get_tx_timestamp = __txn_get_tx_timestamp;
+ dbenv->get_verbose = __env_get_verbose;
+ dbenv->is_bigendian = __db_isbigendian;
+ dbenv->lock_detect = __lock_detect_pp;
+ dbenv->lock_get = __lock_get_pp;
+ dbenv->lock_id = __lock_id_pp;
+ dbenv->lock_id_free = __lock_id_free_pp;
+ dbenv->lock_put = __lock_put_pp;
+ dbenv->lock_stat = __lock_stat_pp;
+ dbenv->lock_stat_print = __lock_stat_print_pp;
+ dbenv->lock_vec = __lock_vec_pp;
+ dbenv->log_archive = __log_archive_pp;
+ dbenv->log_cursor = __log_cursor_pp;
+ dbenv->log_file = __log_file_pp;
+ dbenv->log_flush = __log_flush_pp;
+ dbenv->log_get_config = __log_get_config;
+ dbenv->log_printf = __log_printf_capi;
+ dbenv->log_put = __log_put_pp;
+ dbenv->log_put_record = __log_put_record_pp;
+ dbenv->log_read_record = __log_read_record_pp;
+ dbenv->log_set_config = __log_set_config;
+ dbenv->log_stat = __log_stat_pp;
+ dbenv->log_stat_print = __log_stat_print_pp;
+ dbenv->log_verify = __log_verify_pp;
+ dbenv->lsn_reset = __env_lsn_reset_pp;
+ dbenv->memp_fcreate = __memp_fcreate_pp;
+ dbenv->memp_register = __memp_register_pp;
+ dbenv->memp_stat = __memp_stat_pp;
+ dbenv->memp_stat_print = __memp_stat_print_pp;
+ dbenv->memp_sync = __memp_sync_pp;
+ dbenv->memp_trickle = __memp_trickle_pp;
+ dbenv->mutex_alloc = __mutex_alloc_pp;
+ dbenv->mutex_free = __mutex_free_pp;
+ dbenv->mutex_get_align = __mutex_get_align;
+ dbenv->mutex_get_increment = __mutex_get_increment;
+ dbenv->mutex_get_init = __mutex_get_init;
+ dbenv->mutex_get_max = __mutex_get_max;
+ dbenv->mutex_get_tas_spins = __mutex_get_tas_spins;
+ dbenv->mutex_lock = __mutex_lock_pp;
+ dbenv->mutex_set_align = __mutex_set_align;
+ dbenv->mutex_set_increment = __mutex_set_increment;
+ dbenv->mutex_set_init = __mutex_set_init;
+ dbenv->mutex_set_max = __mutex_set_max;
+ dbenv->mutex_set_tas_spins = __mutex_set_tas_spins;
+ dbenv->mutex_stat = __mutex_stat_pp;
+ dbenv->mutex_stat_print = __mutex_stat_print_pp;
+ dbenv->mutex_unlock = __mutex_unlock_pp;
+ dbenv->open = __env_open_pp;
+ dbenv->remove = __env_remove;
+ dbenv->rep_elect = __rep_elect_pp;
+ dbenv->rep_flush = __rep_flush;
+ dbenv->rep_get_clockskew = __rep_get_clockskew;
+ dbenv->rep_get_config = __rep_get_config;
+ dbenv->rep_get_limit = __rep_get_limit;
+ dbenv->rep_get_nsites = __rep_get_nsites;
+ dbenv->rep_get_priority = __rep_get_priority;
+ dbenv->rep_get_request = __rep_get_request;
+ dbenv->rep_get_timeout = __rep_get_timeout;
+ dbenv->rep_process_message = __rep_process_message_pp;
+ dbenv->rep_set_clockskew = __rep_set_clockskew;
+ dbenv->rep_set_config = __rep_set_config;
+ dbenv->rep_set_limit = __rep_set_limit;
+ dbenv->rep_set_nsites = __rep_set_nsites_pp;
+ dbenv->rep_set_priority = __rep_set_priority;
+ dbenv->rep_set_request = __rep_set_request;
+ dbenv->rep_set_timeout = __rep_set_timeout;
+ dbenv->rep_set_transport = __rep_set_transport_pp;
+ dbenv->rep_start = __rep_start_pp;
+ dbenv->rep_stat = __rep_stat_pp;
+ dbenv->rep_stat_print = __rep_stat_print_pp;
+ dbenv->rep_sync = __rep_sync;
+ dbenv->repmgr_channel = __repmgr_channel;
+ dbenv->repmgr_get_ack_policy = __repmgr_get_ack_policy;
+ dbenv->repmgr_local_site = __repmgr_local_site;
+ dbenv->repmgr_msg_dispatch = __repmgr_set_msg_dispatch;
+ dbenv->repmgr_set_ack_policy = __repmgr_set_ack_policy;
+ dbenv->repmgr_site = __repmgr_site;
+ dbenv->repmgr_site_by_eid = __repmgr_site_by_eid;
+ dbenv->repmgr_site_list = __repmgr_site_list;
+ dbenv->repmgr_start = __repmgr_start;
+ dbenv->repmgr_stat = __repmgr_stat_pp;
+ dbenv->repmgr_stat_print = __repmgr_stat_print_pp;
+ dbenv->set_alloc = __env_set_alloc;
+ dbenv->set_app_dispatch = __env_set_app_dispatch;
+ dbenv->set_backup_callbacks = __env_set_backup_callbacks;
+ dbenv->set_backup_config = __env_set_backup_config;
+ dbenv->set_cache_max = __memp_set_cache_max;
+ dbenv->set_cachesize = __memp_set_cachesize;
+ dbenv->set_create_dir = __env_set_create_dir;
+ dbenv->set_data_dir = __env_set_data_dir;
+ dbenv->set_data_len = __env_set_data_len;
+ dbenv->set_encrypt = __env_set_encrypt;
+ dbenv->set_errcall = __env_set_errcall;
+ dbenv->set_errfile = __env_set_errfile;
+ dbenv->set_errpfx = __env_set_errpfx;
+ dbenv->set_event_notify = __env_set_event_notify;
+ dbenv->set_feedback = __env_set_feedback;
+ dbenv->set_flags = __env_set_flags;
+ dbenv->set_intermediate_dir_mode = __env_set_intermediate_dir_mode;
+ dbenv->set_isalive = __env_set_isalive;
+ dbenv->set_lg_bsize = __log_set_lg_bsize;
+ dbenv->set_lg_dir = __log_set_lg_dir;
+ dbenv->set_lg_filemode = __log_set_lg_filemode;
+ dbenv->set_lg_max = __log_set_lg_max;
+ dbenv->set_lg_regionmax = __log_set_lg_regionmax;
+ dbenv->set_lk_conflicts = __lock_set_lk_conflicts;
+ dbenv->set_lk_detect = __lock_set_lk_detect;
+ dbenv->set_lk_max_lockers = __lock_set_lk_max_lockers;
+ dbenv->set_lk_max_locks = __lock_set_lk_max_locks;
+ dbenv->set_lk_max_objects = __lock_set_lk_max_objects;
+ dbenv->set_lk_partitions = __lock_set_lk_partitions;
+ dbenv->set_lk_priority = __lock_set_lk_priority;
+ dbenv->set_lk_tablesize = __lock_set_lk_tablesize;
+ dbenv->set_memory_init = __env_set_memory_init;
+ dbenv->set_memory_max = __env_set_memory_max;
+ dbenv->set_metadata_dir = __env_set_metadata_dir;
+ dbenv->set_mp_max_openfd = __memp_set_mp_max_openfd;
+ dbenv->set_mp_max_write = __memp_set_mp_max_write;
+ dbenv->set_mp_mmapsize = __memp_set_mp_mmapsize;
+ dbenv->set_mp_mtxcount = __memp_set_mp_mtxcount;
+ dbenv->set_mp_pagesize = __memp_set_mp_pagesize;
+ dbenv->set_mp_tablesize = __memp_set_mp_tablesize;
+ dbenv->set_msgcall = __env_set_msgcall;
+ dbenv->set_msgfile = __env_set_msgfile;
+ dbenv->set_paniccall = __env_set_paniccall;
+ dbenv->set_shm_key = __env_set_shm_key;
+ dbenv->set_thread_count = __env_set_thread_count;
+ dbenv->set_thread_id = __env_set_thread_id;
+ dbenv->set_thread_id_string = __env_set_thread_id_string;
+ dbenv->set_timeout = __env_set_timeout;
+ dbenv->set_tmp_dir = __env_set_tmp_dir;
+ dbenv->set_tx_max = __txn_set_tx_max;
+ dbenv->set_tx_timestamp = __txn_set_tx_timestamp;
+ dbenv->set_verbose = __env_set_verbose;
+ dbenv->stat_print = __env_stat_print_pp;
+ dbenv->txn_applied = __txn_applied_pp;
+ dbenv->txn_begin = __txn_begin_pp;
+ dbenv->txn_checkpoint = __txn_checkpoint_pp;
+ dbenv->txn_recover = __txn_recover_pp;
+ dbenv->txn_stat = __txn_stat_pp;
+ dbenv->txn_stat_print = __txn_stat_print_pp;
+ /* DB_ENV PUBLIC HANDLE LIST END */
+
+ /* DB_ENV PRIVATE HANDLE LIST BEGIN */
+ dbenv->prdbt = __db_prdbt;
+ /* DB_ENV PRIVATE HANDLE LIST END */
+
+ dbenv->shm_key = INVALID_REGION_SEGID;
+ dbenv->thread_id = __os_id;
+ dbenv->thread_id_string = __env_thread_id_string;
+
+ env = dbenv->env;
+ __os_id(NULL, &env->pid_cache, NULL);
+
+ env->db_ref = 0;
+ env->log_verify_wrap = __log_verify_wrap;
+ env->data_len = ENV_DEF_DATA_LEN;
+ TAILQ_INIT(&env->fdlist);
+
+ if (!__db_isbigendian())
+ F_SET(env, ENV_LITTLEENDIAN);
+ F_SET(env, ENV_NO_OUTPUT_SET);
+
+ return (0);
+}
+
+/*
+ * __env_err --
+ * DbEnv.err method.
+ */
+static void
+#ifdef STDC_HEADERS
+__env_err(const DB_ENV *dbenv, int error, const char *fmt, ...)
+#else
+__env_err(dbenv, error, fmt, va_alist)
+ const DB_ENV *dbenv;
+ int error;
+ const char *fmt;
+ va_dcl
+#endif
+{
+ /* Message with error string, to stderr by default. */
+ DB_REAL_ERR(dbenv, error, DB_ERROR_SET, 1, fmt);
+}
+
+/*
+ * __env_errx --
+ * DbEnv.errx method.
+ */
+static void
+#ifdef STDC_HEADERS
+__env_errx(const DB_ENV *dbenv, const char *fmt, ...)
+#else
+__env_errx(dbenv, fmt, va_alist)
+ const DB_ENV *dbenv;
+ const char *fmt;
+ va_dcl
+#endif
+{
+ /* Message without error string, to stderr by default. */
+ DB_REAL_ERR(dbenv, 0, DB_ERROR_NOT_SET, 1, fmt);
+}
+
+static int
+__env_get_home(dbenv, homep)
+ DB_ENV *dbenv;
+ const char **homep;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->get_home");
+ *homep = env->db_home;
+
+ return (0);
+}
+
+/*
+ * __env_get_alloc --
+ * {DB_ENV,DB}->get_alloc.
+ *
+ * PUBLIC: int __env_get_alloc __P((DB_ENV *, void *(**)(size_t),
+ * PUBLIC: void *(**)(void *, size_t), void (**)(void *)));
+ */
+int
+__env_get_alloc(dbenv, mal_funcp, real_funcp, free_funcp)
+ DB_ENV *dbenv;
+ void *(**mal_funcp) __P((size_t));
+ void *(**real_funcp) __P((void *, size_t));
+ void (**free_funcp) __P((void *));
+{
+
+ if (mal_funcp != NULL)
+ *mal_funcp = dbenv->db_malloc;
+ if (real_funcp != NULL)
+ *real_funcp = dbenv->db_realloc;
+ if (free_funcp != NULL)
+ *free_funcp = dbenv->db_free;
+ return (0);
+}
+
+/*
+ * __env_set_alloc --
+ * {DB_ENV,DB}->set_alloc.
+ *
+ * PUBLIC: int __env_set_alloc __P((DB_ENV *, void *(*)(size_t),
+ * PUBLIC: void *(*)(void *, size_t), void (*)(void *)));
+ */
+int
+__env_set_alloc(dbenv, mal_func, real_func, free_func)
+ DB_ENV *dbenv;
+ void *(*mal_func) __P((size_t));
+ void *(*real_func) __P((void *, size_t));
+ void (*free_func) __P((void *));
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_alloc");
+
+ dbenv->db_malloc = mal_func;
+ dbenv->db_realloc = real_func;
+ dbenv->db_free = free_func;
+ return (0);
+}
+/*
+ * __env_get_memory_init --
+ * DB_ENV->get_memory_init.
+ *
+ * PUBLIC: int __env_get_memory_init __P((DB_ENV *,
+ * PUBLIC: DB_MEM_CONFIG, u_int32_t *));
+ */
+int
+__env_get_memory_init(dbenv, type, countp)
+ DB_ENV *dbenv;
+ DB_MEM_CONFIG type;
+ u_int32_t *countp;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ switch (type) {
+ case DB_MEM_LOCK:
+ ENV_NOT_CONFIGURED(env,
+ env->lk_handle, "DB_ENV->get_memory_init", DB_INIT_LOCK);
+ if (LOCKING_ON(env))
+ *countp = ((DB_LOCKREGION *)
+ env->lk_handle->reginfo.primary)->stat.st_initlocks;
+ else
+ *countp = dbenv->lk_init;
+ break;
+ case DB_MEM_LOCKOBJECT:
+ ENV_NOT_CONFIGURED(env,
+ env->lk_handle, "DB_ENV->get_memory_init", DB_INIT_LOCK);
+ if (LOCKING_ON(env))
+ *countp = ((DB_LOCKREGION *) env->
+ lk_handle->reginfo.primary)->stat.st_initobjects;
+ else
+ *countp = dbenv->lk_init_objects;
+ break;
+ case DB_MEM_LOCKER:
+ ENV_NOT_CONFIGURED(env,
+ env->lk_handle, "DB_ENV->get_memory_init", DB_INIT_LOCK);
+ if (LOCKING_ON(env))
+ *countp = ((DB_LOCKREGION *) env->
+ lk_handle->reginfo.primary)->stat.st_initlockers;
+ else
+ *countp = dbenv->lk_init_lockers;
+ break;
+ case DB_MEM_LOGID:
+ ENV_NOT_CONFIGURED(env,
+ env->lg_handle, "DB_ENV->get_memory_init", DB_INIT_LOG);
+
+ if (LOGGING_ON(env))
+ *countp = ((LOG *)env->lg_handle->
+ reginfo.primary)->stat.st_fileid_init;
+ else
+ *countp = dbenv->lg_fileid_init;
+ break;
+ case DB_MEM_TRANSACTION:
+ ENV_NOT_CONFIGURED(env,
+ env->tx_handle, "DB_ENV->memory_init", DB_INIT_TXN);
+
+ if (TXN_ON(env))
+ *countp = ((DB_TXNREGION *)
+ env->tx_handle->reginfo.primary)->inittxns;
+ else
+ *countp = dbenv->tx_init;
+ break;
+ case DB_MEM_THREAD:
+ /* We always update thr_init when joining an env. */
+ *countp = dbenv->thr_init;
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * __env_set_memory_init --
+ * DB_ENV->set_memory_init.
+ *
+ * PUBLIC: int __env_set_memory_init __P((DB_ENV *, DB_MEM_CONFIG, u_int32_t));
+ */
+int
+__env_set_memory_init(dbenv, type, count)
+ DB_ENV *dbenv;
+ DB_MEM_CONFIG type;
+ u_int32_t count;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_memory_init");
+ switch (type) {
+ case DB_MEM_LOCK:
+ dbenv->lk_init = count;
+ break;
+ case DB_MEM_LOCKOBJECT:
+ dbenv->lk_init_objects = count;
+ break;
+ case DB_MEM_LOCKER:
+ dbenv->lk_init_lockers = count;
+ break;
+ case DB_MEM_LOGID:
+ dbenv->lg_fileid_init = count;
+ break;
+ case DB_MEM_TRANSACTION:
+ dbenv->tx_init = count;
+ break;
+ case DB_MEM_THREAD:
+ dbenv->thr_init = count;
+ break;
+ }
+
+ return (0);
+}
+/*
+ * __env_get_memory_max --
+ * DB_ENV->get_memory_max.
+ *
+ * PUBLIC: int __env_get_memory_max __P((DB_ENV *, u_int32_t *, u_int32_t *));
+ */
+int
+__env_get_memory_max(dbenv, gbytes, bytes)
+ DB_ENV *dbenv;
+ u_int32_t *gbytes, *bytes;
+{
+ ENV *env;
+ env = dbenv->env;
+
+ if (F_ISSET(env, ENV_OPEN_CALLED)) {
+ *gbytes = (u_int32_t)(env->reginfo->rp->max / GIGABYTE);
+ *bytes = (u_int32_t)(env->reginfo->rp->max % GIGABYTE);
+ } else {
+ *gbytes = (u_int32_t)(dbenv->memory_max / GIGABYTE);
+ *bytes = (u_int32_t)(dbenv->memory_max % GIGABYTE);
+ }
+ return (0);
+}
+
+/*
+ * __env_set_memory_max --
+ * DB_ENV->set_memory_max.
+ *
+ * PUBLIC: int __env_set_memory_max __P((DB_ENV *, u_int32_t, u_int32_t));
+ */
+int
+__env_set_memory_max(dbenv, gbytes, bytes)
+ DB_ENV *dbenv;
+ u_int32_t gbytes, bytes;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_memory_max");
+
+ /*
+ * If they are asking for 4GB exactly on a 32 bit platform, they
+ * really meant 4GB - 1. Give it to them.
+ */
+ if (sizeof(roff_t) == 4 && gbytes == 4 && bytes == 0) {
+ --gbytes;
+ bytes = GIGABYTE - 1;
+ }
+ /*
+ * Make sure they wouldn't overflow the memory_max field on a
+ * 32 bit architecture.
+ */
+ if (sizeof(roff_t) == 4 && gbytes >= 4) {
+ __db_errx(env, DB_STR("1588",
+ "Maximum memory size too large: maximum is 4GB"));
+ return (EINVAL);
+ }
+ dbenv->memory_max = ((roff_t)gbytes * GIGABYTE) + bytes;
+ return (0);
+}
+
+/*
+ * __env_get_app_dispatch --
+ * Get the transaction abort recover function.
+ */
+static int
+__env_get_app_dispatch(dbenv, app_dispatchp)
+ DB_ENV *dbenv;
+ int (**app_dispatchp) __P((DB_ENV *, DBT *, DB_LSN *, db_recops));
+{
+
+ if (app_dispatchp != NULL)
+ *app_dispatchp = dbenv->app_dispatch;
+ return (0);
+}
+
+/*
+ * __env_set_app_dispatch --
+ * Set the transaction abort recover function.
+ */
+static int
+__env_set_app_dispatch(dbenv, app_dispatch)
+ DB_ENV *dbenv;
+ int (*app_dispatch) __P((DB_ENV *, DBT *, DB_LSN *, db_recops));
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_app_dispatch");
+
+ dbenv->app_dispatch = app_dispatch;
+ return (0);
+}
+
+/*
+ * __env_get_encrypt_flags --
+ * {DB_ENV,DB}->get_encrypt_flags.
+ *
+ * PUBLIC: int __env_get_encrypt_flags __P((DB_ENV *, u_int32_t *));
+ */
+int
+__env_get_encrypt_flags(dbenv, flagsp)
+ DB_ENV *dbenv;
+ u_int32_t *flagsp;
+{
+#ifdef HAVE_CRYPTO
+ DB_CIPHER *db_cipher;
+#endif
+ ENV *env;
+
+ env = dbenv->env;
+
+#ifdef HAVE_CRYPTO
+ db_cipher = env->crypto_handle;
+ if (db_cipher != NULL && db_cipher->alg == CIPHER_AES)
+ *flagsp = DB_ENCRYPT_AES;
+ else
+ *flagsp = 0;
+ return (0);
+#else
+ COMPQUIET(flagsp, 0);
+ __db_errx(env, DB_STR("1555",
+ "library build did not include support for cryptography"));
+ return (DB_OPNOTSUP);
+#endif
+}
+
+/*
+ * __env_set_encrypt --
+ * DB_ENV->set_encrypt.
+ *
+ * PUBLIC: int __env_set_encrypt __P((DB_ENV *, const char *, u_int32_t));
+ */
+int
+__env_set_encrypt(dbenv, passwd, flags)
+ DB_ENV *dbenv;
+ const char *passwd;
+ u_int32_t flags;
+{
+#ifdef HAVE_CRYPTO
+ DB_THREAD_INFO *ip;
+ DB_CIPHER *db_cipher;
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_encrypt");
+#define OK_CRYPTO_FLAGS (DB_ENCRYPT_AES)
+
+ if (flags != 0 && LF_ISSET(~OK_CRYPTO_FLAGS))
+ return (__db_ferr(env, "DB_ENV->set_encrypt", 0));
+
+ if (passwd == NULL || strlen(passwd) == 0) {
+ __db_errx(env, DB_STR("1556",
+ "Empty password specified to set_encrypt"));
+ return (EINVAL);
+ }
+ ENV_ENTER(env, ip);
+ if (!CRYPTO_ON(env)) {
+ if ((ret = __os_calloc(env, 1, sizeof(DB_CIPHER), &db_cipher))
+ != 0)
+ goto err;
+ env->crypto_handle = db_cipher;
+ } else
+ db_cipher = env->crypto_handle;
+
+ if (dbenv->passwd != NULL)
+ __os_free(env, dbenv->passwd);
+ if ((ret = __os_strdup(env, passwd, &dbenv->passwd)) != 0) {
+ __os_free(env, db_cipher);
+ goto err;
+ }
+ /*
+ * We're going to need this often enough to keep around
+ */
+ dbenv->passwd_len = strlen(dbenv->passwd) + 1;
+ /*
+ * The MAC key is for checksumming, and is separate from
+ * the algorithm. So initialize it here, even if they
+ * are using CIPHER_ANY.
+ */
+ __db_derive_mac(
+ (u_int8_t *)dbenv->passwd, dbenv->passwd_len, db_cipher->mac_key);
+ switch (flags) {
+ case 0:
+ F_SET(db_cipher, CIPHER_ANY);
+ break;
+ case DB_ENCRYPT_AES:
+ if ((ret =
+ __crypto_algsetup(env, db_cipher, CIPHER_AES, 0)) != 0)
+ goto err1;
+ break;
+ default: /* Impossible. */
+ break;
+ }
+ ENV_LEAVE(env, ip);
+ return (0);
+
+err1:
+ __os_free(env, dbenv->passwd);
+ __os_free(env, db_cipher);
+ env->crypto_handle = NULL;
+err:
+ ENV_LEAVE(env, ip);
+ return (ret);
+#else
+ COMPQUIET(passwd, NULL);
+ COMPQUIET(flags, 0);
+
+ __db_errx(dbenv->env, DB_STR("1557",
+ "library build did not include support for cryptography"));
+ return (DB_OPNOTSUP);
+#endif
+}
+#ifndef HAVE_BREW
+static
+#endif
+const FLAG_MAP EnvMap[] = {
+ { DB_AUTO_COMMIT, DB_ENV_AUTO_COMMIT },
+ { DB_CDB_ALLDB, DB_ENV_CDB_ALLDB },
+ { DB_DATABASE_LOCKING, DB_ENV_DATABASE_LOCKING },
+ { DB_DIRECT_DB, DB_ENV_DIRECT_DB },
+ { DB_DSYNC_DB, DB_ENV_DSYNC_DB },
+ { DB_HOTBACKUP_IN_PROGRESS, DB_ENV_HOTBACKUP },
+ { DB_MULTIVERSION, DB_ENV_MULTIVERSION },
+ { DB_NOFLUSH, DB_ENV_NOFLUSH },
+ { DB_NOLOCKING, DB_ENV_NOLOCKING },
+ { DB_NOMMAP, DB_ENV_NOMMAP },
+ { DB_NOPANIC, DB_ENV_NOPANIC },
+ { DB_OVERWRITE, DB_ENV_OVERWRITE },
+ { DB_REGION_INIT, DB_ENV_REGION_INIT },
+ { DB_TIME_NOTGRANTED, DB_ENV_TIME_NOTGRANTED },
+ { DB_TXN_NOSYNC, DB_ENV_TXN_NOSYNC },
+ { DB_TXN_NOWAIT, DB_ENV_TXN_NOWAIT },
+ { DB_TXN_SNAPSHOT, DB_ENV_TXN_SNAPSHOT },
+ { DB_TXN_WRITE_NOSYNC, DB_ENV_TXN_WRITE_NOSYNC },
+ { DB_YIELDCPU, DB_ENV_YIELDCPU }
+};
+
+/*
+ * __env_map_flags -- map from external to internal flags.
+ * PUBLIC: void __env_map_flags __P((const FLAG_MAP *,
+ * PUBLIC: u_int, u_int32_t *, u_int32_t *));
+ */
+void
+__env_map_flags(flagmap, mapsize, inflagsp, outflagsp)
+ const FLAG_MAP *flagmap;
+ u_int mapsize;
+ u_int32_t *inflagsp, *outflagsp;
+{
+
+ const FLAG_MAP *fmp;
+ u_int i;
+
+ for (i = 0, fmp = flagmap;
+ i < mapsize / sizeof(flagmap[0]); ++i, ++fmp)
+ if (FLD_ISSET(*inflagsp, fmp->inflag)) {
+ FLD_SET(*outflagsp, fmp->outflag);
+ FLD_CLR(*inflagsp, fmp->inflag);
+ if (*inflagsp == 0)
+ break;
+ }
+}
+
+/*
+ * __env_fetch_flags -- map from internal to external flags.
+ * PUBLIC: void __env_fetch_flags __P((const FLAG_MAP *,
+ * PUBLIC: u_int, u_int32_t *, u_int32_t *));
+ */
+void
+__env_fetch_flags(flagmap, mapsize, inflagsp, outflagsp)
+ const FLAG_MAP *flagmap;
+ u_int mapsize;
+ u_int32_t *inflagsp, *outflagsp;
+{
+ const FLAG_MAP *fmp;
+ u_int32_t i;
+
+ *outflagsp = 0;
+ for (i = 0, fmp = flagmap;
+ i < mapsize / sizeof(flagmap[0]); ++i, ++fmp)
+ if (FLD_ISSET(*inflagsp, fmp->outflag))
+ FLD_SET(*outflagsp, fmp->inflag);
+}
+
+static int
+__env_get_flags(dbenv, flagsp)
+ DB_ENV *dbenv;
+ u_int32_t *flagsp;
+{
+ ENV *env;
+ DB_THREAD_INFO *ip;
+
+ __env_fetch_flags(EnvMap, sizeof(EnvMap), &dbenv->flags, flagsp);
+
+ env = dbenv->env;
+ /* Some flags are persisted in the regions. */
+ if (env->reginfo != NULL &&
+ ((REGENV *)env->reginfo->primary)->panic != 0)
+ FLD_SET(*flagsp, DB_PANIC_ENVIRONMENT);
+
+ /* If the hotbackup counter is positive, set the flag indicating so. */
+ if (TXN_ON(env)) {
+ ENV_ENTER(env, ip);
+ TXN_SYSTEM_LOCK(env);
+ if (((DB_TXNREGION *)
+ env->tx_handle->reginfo.primary)->n_hotbackup > 0)
+ FLD_SET(*flagsp, DB_HOTBACKUP_IN_PROGRESS);
+ TXN_SYSTEM_UNLOCK(env);
+ ENV_LEAVE(env, ip);
+ }
+
+ return (0);
+}
+
+/*
+ * __env_set_flags --
+ * DB_ENV->set_flags.
+ *
+ * PUBLIC: int __env_set_flags __P((DB_ENV *, u_int32_t, int));
+ */
+int
+__env_set_flags(dbenv, flags, on)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+ int on;
+{
+ ENV *env;
+ DB_THREAD_INFO *ip;
+ u_int32_t mapped_flags;
+ int mem_on, ret;
+
+ env = dbenv->env;
+
+#define OK_FLAGS \
+ (DB_AUTO_COMMIT | DB_CDB_ALLDB | DB_DATABASE_LOCKING | \
+ DB_DIRECT_DB | DB_DSYNC_DB | DB_MULTIVERSION | \
+ DB_NOLOCKING | DB_NOMMAP | DB_NOPANIC | DB_OVERWRITE | \
+ DB_PANIC_ENVIRONMENT | DB_REGION_INIT | \
+ DB_TIME_NOTGRANTED | DB_TXN_NOSYNC | DB_TXN_NOWAIT | \
+ DB_TXN_SNAPSHOT | DB_TXN_WRITE_NOSYNC | DB_YIELDCPU | \
+ DB_HOTBACKUP_IN_PROGRESS | DB_NOFLUSH)
+
+ if (LF_ISSET(~OK_FLAGS))
+ return (__db_ferr(env, "DB_ENV->set_flags", 0));
+ if (on) {
+ if ((ret = __db_fcchk(env, "DB_ENV->set_flags",
+ flags, DB_TXN_NOSYNC, DB_TXN_WRITE_NOSYNC)) != 0)
+ return (ret);
+ if (LF_ISSET(DB_DIRECT_DB) && __os_support_direct_io() == 0) {
+ __db_errx(env,
+ "DB_ENV->set_flags: direct I/O either not configured or not supported");
+ return (EINVAL);
+ }
+ }
+
+ if (LF_ISSET(DB_CDB_ALLDB))
+ ENV_ILLEGAL_AFTER_OPEN(env,
+ "DB_ENV->set_flags: DB_CDB_ALLDB");
+ if (LF_ISSET(DB_PANIC_ENVIRONMENT)) {
+ ENV_ILLEGAL_BEFORE_OPEN(env,
+ "DB_ENV->set_flags: DB_PANIC_ENVIRONMENT");
+ if (on) {
+ __db_errx(env, DB_STR("1558",
+ "Environment panic set"));
+ (void)__env_panic(env, DB_RUNRECOVERY);
+ } else
+ __env_panic_set(env, 0);
+ }
+ if (LF_ISSET(DB_REGION_INIT))
+ ENV_ILLEGAL_AFTER_OPEN(env,
+ "DB_ENV->set_flags: DB_REGION_INIT");
+
+ /*
+ * DB_LOG_IN_MEMORY, DB_TXN_NOSYNC and DB_TXN_WRITE_NOSYNC are
+ * mutually incompatible. If we're setting one of them, clear all
+ * current settings. If the environment is open, check to see that
+ * logging is not in memory.
+ */
+ if (on && LF_ISSET(DB_TXN_NOSYNC | DB_TXN_WRITE_NOSYNC)) {
+ F_CLR(dbenv, DB_ENV_TXN_NOSYNC | DB_ENV_TXN_WRITE_NOSYNC);
+ if (!F_ISSET(env, ENV_OPEN_CALLED)) {
+ if ((ret =
+ __log_set_config(dbenv, DB_LOG_IN_MEMORY, 0)) != 0)
+ return (ret);
+ } else if (LOGGING_ON(env)) {
+ if ((ret = __log_get_config(dbenv,
+ DB_LOG_IN_MEMORY, &mem_on)) != 0)
+ return (ret);
+ if (mem_on == 1) {
+ __db_errx(env, DB_STR("1559",
+ "DB_TXN_NOSYNC and DB_TXN_WRITE_NOSYNC"
+ " may not be used with DB_LOG_IN_MEMORY"));
+ return (EINVAL);
+ }
+ }
+ }
+
+ /*
+ * Settings of DB_HOTBACKUP_IN_PROGRESS are reference-counted
+ * in REGENV.
+ */
+ if (LF_ISSET(DB_HOTBACKUP_IN_PROGRESS)) {
+ /* You can't take a hot backup without transactions. */
+ ENV_REQUIRES_CONFIG(env, env->tx_handle,
+ "DB_ENV->set_flags: DB_HOTBACKUP_IN_PROGRESS", DB_INIT_TXN);
+
+ ENV_ENTER(env, ip);
+ ret = __env_set_backup(env, on);
+ ENV_LEAVE(env, ip);
+ if (ret != 0)
+ return (ret);
+ }
+
+ mapped_flags = 0;
+ __env_map_flags(EnvMap, sizeof(EnvMap), &flags, &mapped_flags);
+ if (on)
+ F_SET(dbenv, mapped_flags);
+ else
+ F_CLR(dbenv, mapped_flags);
+
+ return (0);
+}
+
+/*
+ * __env_set_backup --
+ * PUBLIC: int __env_set_backup __P((ENV *, int));
+ */
+int
+__env_set_backup(env, on)
+ ENV *env;
+ int on;
+{
+ DB_TXNREGION *tenv;
+ int needs_checkpoint, ret;
+
+ tenv = (DB_TXNREGION *)env->tx_handle->reginfo.primary;
+ needs_checkpoint = 0;
+
+ TXN_SYSTEM_LOCK(env);
+ if (on) {
+ tenv->n_hotbackup++;
+ if (tenv->n_bulk_txn > 0)
+ needs_checkpoint = 1;
+ } else {
+ if (tenv->n_hotbackup == 0)
+ needs_checkpoint = -1; /* signal count error */
+ else
+ tenv->n_hotbackup--;
+ }
+ TXN_SYSTEM_UNLOCK(env);
+
+ if (needs_checkpoint == -1) {
+ __db_errx(env, DB_STR("1560",
+ "Attempt to decrement hotbackup counter past zero"));
+ return (EINVAL);
+ }
+
+ if (needs_checkpoint && (ret = __txn_checkpoint(env, 0, 0, 0)))
+ return (ret);
+ return (0);
+}
+
+static int
+__env_get_data_dirs(dbenv, dirpp)
+ DB_ENV *dbenv;
+ const char ***dirpp;
+{
+ *dirpp = (const char **)dbenv->db_data_dir;
+ return (0);
+}
+
+/*
+ * __env_set_data_dir --
+ * DB_ENV->set_data_dir.
+ *
+ * PUBLIC: int __env_set_data_dir __P((DB_ENV *, const char *));
+ */
+int
+__env_set_data_dir(dbenv, dir)
+ DB_ENV *dbenv;
+ const char *dir;
+{
+ int ret;
+
+ if ((ret = __env_add_data_dir(dbenv, dir)) != 0)
+ return (ret);
+
+ if (dbenv->data_next == 1)
+ return (__env_set_create_dir(dbenv, dir));
+
+ return (0);
+}
+
+/*
+ * __env_add_data_dir --
+ * DB_ENV->add_data_dir.
+ *
+ * PUBLIC: int __env_add_data_dir __P((DB_ENV *, const char *));
+ */
+int
+__env_add_data_dir(dbenv, dir)
+ DB_ENV *dbenv;
+ const char *dir;
+{
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->add_data_dir");
+
+ /*
+ * The array is NULL-terminated so it can be returned by get_data_dirs
+ * without a length.
+ */
+
+#define DATA_INIT_CNT 20 /* Start with 20 data slots. */
+ if (dbenv->db_data_dir == NULL) {
+ if ((ret = __os_calloc(env, DATA_INIT_CNT,
+ sizeof(char **), &dbenv->db_data_dir)) != 0)
+ return (ret);
+ dbenv->data_cnt = DATA_INIT_CNT;
+ } else if (dbenv->data_next == dbenv->data_cnt - 2) {
+ dbenv->data_cnt *= 2;
+ if ((ret = __os_realloc(env,
+ (u_int)dbenv->data_cnt * sizeof(char **),
+ &dbenv->db_data_dir)) != 0)
+ return (ret);
+ }
+
+ ret = __os_strdup(env,
+ dir, &dbenv->db_data_dir[dbenv->data_next++]);
+ dbenv->db_data_dir[dbenv->data_next] = NULL;
+ return (ret);
+}
+
+/*
+ * __env_set_create_dir --
+ * DB_ENV->set_create_dir.
+ * The list of directories cannot change after opening the env and setting
+ * a pointer must be atomic so we do not need to mutex here even if multiple
+ * threads are using the DB_ENV handle.
+ *
+ * PUBLIC: int __env_set_create_dir __P((DB_ENV *, const char *));
+ */
+int
+__env_set_create_dir(dbenv, dir)
+ DB_ENV *dbenv;
+ const char *dir;
+{
+ ENV *env;
+ int i;
+
+ env = dbenv->env;
+
+ for (i = 0; i < dbenv->data_next; i++)
+ if (strcmp(dir, dbenv->db_data_dir[i]) == 0)
+ break;
+
+ if (i == dbenv->data_next) {
+ __db_errx(env, DB_STR_A("1561",
+ "Directory %s not in environment list.", "%s"), dir);
+ return (EINVAL);
+ }
+
+ dbenv->db_create_dir = dbenv->db_data_dir[i];
+ return (0);
+}
+
+static int
+__env_get_create_dir(dbenv, dirp)
+ DB_ENV *dbenv;
+ const char **dirp;
+{
+ *dirp = dbenv->db_create_dir;
+ return (0);
+}
+
+static int
+__env_get_intermediate_dir_mode(dbenv, modep)
+ DB_ENV *dbenv;
+ const char **modep;
+{
+ *modep = dbenv->intermediate_dir_mode;
+ return (0);
+}
+
+/*
+ * __env_set_metadata_dir --
+ * DB_ENV->set_metadata_dir.
+ *
+ * PUBLIC: int __env_set_metadata_dir __P((DB_ENV *, const char *));
+ */
+int
+__env_set_metadata_dir(dbenv, dir)
+ DB_ENV *dbenv;
+ const char *dir;
+{
+ ENV *env;
+ int i, ret;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_metadata_dir");
+
+ /* If metadata_dir is not already on data_dir list, add it. */
+ for (i = 0; i < dbenv->data_next; i++)
+ if (strcmp(dir, dbenv->db_data_dir[i]) == 0)
+ break;
+ if (i == dbenv->data_next &&
+ (ret = __env_add_data_dir(dbenv, dir)) != 0) {
+ __db_errx(env, DB_STR_A("1590",
+ "Could not add %s to environment list.", "%s"), dir);
+ return (ret);
+ }
+
+ if (dbenv->db_md_dir != NULL)
+ __os_free(env, dbenv->db_md_dir);
+ return (__os_strdup(env, dir, &dbenv->db_md_dir));
+}
+
+static int
+__env_get_metadata_dir(dbenv, dirp)
+ DB_ENV *dbenv;
+ const char **dirp;
+{
+ *dirp = dbenv->db_md_dir;
+ return (0);
+}
+
+/*
+ * __env_set_data_len --
+ * DB_ENV->set_data_len.
+ *
+ * PUBLIC: int __env_set_data_len __P((DB_ENV *, u_int32_t));
+ */
+int
+__env_set_data_len(dbenv, data_len)
+ DB_ENV *dbenv;
+ u_int32_t data_len;
+{
+
+ dbenv->env->data_len = data_len;
+ return (0);
+}
+
+static int
+__env_get_data_len(dbenv, data_lenp)
+ DB_ENV *dbenv;
+ u_int32_t *data_lenp;
+{
+ *data_lenp = dbenv->env->data_len;
+ return (0);
+}
+
+/*
+ * __env_set_intermediate_dir_mode --
+ * DB_ENV->set_intermediate_dir_mode.
+ *
+ * PUBLIC: int __env_set_intermediate_dir_mode __P((DB_ENV *, const char *));
+ */
+int
+__env_set_intermediate_dir_mode(dbenv, mode)
+ DB_ENV *dbenv;
+ const char *mode;
+{
+ ENV *env;
+ u_int t;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_intermediate_dir_mode");
+
+#define __SETMODE(offset, valid_ch, mask) { \
+ if (mode[offset] == (valid_ch)) \
+ t |= (mask); \
+ else if (mode[offset] != '-') \
+ goto format_err; \
+}
+ t = 0;
+ __SETMODE(0, 'r', S_IRUSR);
+ __SETMODE(1, 'w', S_IWUSR);
+ __SETMODE(2, 'x', S_IXUSR);
+ __SETMODE(3, 'r', S_IRGRP);
+ __SETMODE(4, 'w', S_IWGRP);
+ __SETMODE(5, 'x', S_IXGRP);
+ __SETMODE(6, 'r', S_IROTH);
+ __SETMODE(7, 'w', S_IWOTH);
+ __SETMODE(8, 'x', S_IXOTH);
+ if (mode[9] != '\0' || t == 0) {
+ /*
+ * We disallow modes of 0 -- we use 0 to decide the application
+ * never configured intermediate directory permissions, and we
+ * shouldn't create intermediate directories. Besides, setting
+ * the permissions to 0 makes no sense.
+ */
+format_err: __db_errx(env,
+ "DB_ENV->set_intermediate_dir_mode: illegal mode \"%s\"", mode);
+ return (EINVAL);
+ }
+
+ if (dbenv->intermediate_dir_mode != NULL)
+ __os_free(env, dbenv->intermediate_dir_mode);
+ if ((ret = __os_strdup(env, mode, &dbenv->intermediate_dir_mode)) != 0)
+ return (ret);
+
+ env->dir_mode = (int)t;
+ return (0);
+}
+
+/*
+ * __env_get_errcall --
+ * {DB_ENV,DB}->get_errcall.
+ *
+ * PUBLIC: void __env_get_errcall __P((DB_ENV *,
+ * PUBLIC: void (**)(const DB_ENV *, const char *, const char *)));
+ */
+void
+__env_get_errcall(dbenv, errcallp)
+ DB_ENV *dbenv;
+ void (**errcallp) __P((const DB_ENV *, const char *, const char *));
+{
+ *errcallp = dbenv->db_errcall;
+}
+
+/*
+ * __env_set_errcall --
+ * {DB_ENV,DB}->set_errcall.
+ *
+ * PUBLIC: void __env_set_errcall __P((DB_ENV *,
+ * PUBLIC: void (*)(const DB_ENV *, const char *, const char *)));
+ */
+void
+__env_set_errcall(dbenv, errcall)
+ DB_ENV *dbenv;
+ void (*errcall) __P((const DB_ENV *, const char *, const char *));
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ F_CLR(env, ENV_NO_OUTPUT_SET);
+ dbenv->db_errcall = errcall;
+}
+
+/*
+ * __env_get_errfile --
+ * {DB_ENV,DB}->get_errfile.
+ *
+ * PUBLIC: void __env_get_errfile __P((DB_ENV *, FILE **));
+ */
+void
+__env_get_errfile(dbenv, errfilep)
+ DB_ENV *dbenv;
+ FILE **errfilep;
+{
+ *errfilep = dbenv->db_errfile;
+}
+
+/*
+ * __env_set_errfile --
+ * {DB_ENV,DB}->set_errfile.
+ *
+ * PUBLIC: void __env_set_errfile __P((DB_ENV *, FILE *));
+ */
+void
+__env_set_errfile(dbenv, errfile)
+ DB_ENV *dbenv;
+ FILE *errfile;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ F_CLR(env, ENV_NO_OUTPUT_SET);
+ dbenv->db_errfile = errfile;
+}
+
+/*
+ * __env_get_errpfx --
+ * {DB_ENV,DB}->get_errpfx.
+ *
+ * PUBLIC: void __env_get_errpfx __P((DB_ENV *, const char **));
+ */
+void
+__env_get_errpfx(dbenv, errpfxp)
+ DB_ENV *dbenv;
+ const char **errpfxp;
+{
+ *errpfxp = dbenv->db_errpfx;
+}
+
+/*
+ * __env_set_errpfx --
+ * {DB_ENV,DB}->set_errpfx.
+ *
+ * PUBLIC: void __env_set_errpfx __P((DB_ENV *, const char *));
+ */
+void
+__env_set_errpfx(dbenv, errpfx)
+ DB_ENV *dbenv;
+ const char *errpfx;
+{
+ dbenv->db_errpfx = errpfx;
+}
+
+static int
+__env_get_feedback(dbenv, feedbackp)
+ DB_ENV *dbenv;
+ void (**feedbackp) __P((DB_ENV *, int, int));
+{
+ if (feedbackp != NULL)
+ *feedbackp = dbenv->db_feedback;
+ return (0);
+}
+
+static int
+__env_set_feedback(dbenv, feedback)
+ DB_ENV *dbenv;
+ void (*feedback) __P((DB_ENV *, int, int));
+{
+ dbenv->db_feedback = feedback;
+ return (0);
+}
+
+/*
+ * __env_get_thread_id_fn --
+ * DB_ENV->get_thread_id_fn
+ */
+static int
+__env_get_thread_id_fn(dbenv, idp)
+ DB_ENV *dbenv;
+ void (**idp) __P((DB_ENV *, pid_t *, db_threadid_t *));
+{
+ if (idp != NULL)
+ *idp = dbenv->thread_id;
+ return (0);
+}
+
+/*
+ * __env_set_thread_id --
+ * DB_ENV->set_thread_id
+ */
+static int
+__env_set_thread_id(dbenv, id)
+ DB_ENV *dbenv;
+ void (*id) __P((DB_ENV *, pid_t *, db_threadid_t *));
+{
+ dbenv->thread_id = id;
+ return (0);
+}
+
+/*
+ * __env_get_threadid_string_fn --
+ * DB_ENV->get_threadid_string_fn
+ */
+static int
+__env_get_thread_id_string_fn(dbenv, thread_id_stringp)
+ DB_ENV *dbenv;
+ char *(**thread_id_stringp)
+ __P((DB_ENV *, pid_t, db_threadid_t, char *));
+{
+ if (thread_id_stringp != NULL)
+ *thread_id_stringp = dbenv->thread_id_string;
+ return (0);
+}
+
+/*
+ * __env_set_threadid_string --
+ * DB_ENV->set_threadid_string
+ */
+static int
+__env_set_thread_id_string(dbenv, thread_id_string)
+ DB_ENV *dbenv;
+ char *(*thread_id_string) __P((DB_ENV *, pid_t, db_threadid_t, char *));
+{
+ dbenv->thread_id_string = thread_id_string;
+ return (0);
+}
+
+/*
+ * __env_get_isalive --
+ * DB_ENV->get_isalive
+ */
+static int
+__env_get_isalive(dbenv, is_alivep)
+ DB_ENV *dbenv;
+ int (**is_alivep) __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t));
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ if (F_ISSET(env, ENV_OPEN_CALLED) && env->thr_nbucket == 0) {
+ __db_errx(env, DB_STR("1562",
+ "is_alive method specified but no thread region allocated"));
+ return (EINVAL);
+ }
+ if (is_alivep != NULL)
+ *is_alivep = dbenv->is_alive;
+ return (0);
+}
+
+/*
+ * __env_set_isalive --
+ * DB_ENV->set_isalive
+ */
+static int
+__env_set_isalive(dbenv, is_alive)
+ DB_ENV *dbenv;
+ int (*is_alive) __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t));
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ if (F_ISSET(env, ENV_OPEN_CALLED) && env->thr_nbucket == 0) {
+ __db_errx(env, DB_STR("1563",
+ "is_alive method specified but no thread region allocated"));
+ return (EINVAL);
+ }
+ dbenv->is_alive = is_alive;
+ return (0);
+}
+
+/*
+ * __env_get_thread_count --
+ * DB_ENV->get_thread_count
+ */
+static int
+__env_get_thread_count(dbenv, countp)
+ DB_ENV *dbenv;
+ u_int32_t *countp;
+{
+ *countp = dbenv->thr_max;
+ return (0);
+}
+
+/*
+ * __env_set_thread_count --
+ * DB_ENV->set_thread_count
+ *
+ * PUBLIC: int __env_set_thread_count __P((DB_ENV *, u_int32_t));
+ */
+int
+__env_set_thread_count(dbenv, count)
+ DB_ENV *dbenv;
+ u_int32_t count;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_thread_count");
+ dbenv->thr_max = count;
+
+ return (0);
+}
+
+/*
+ * __env_get_msgcall --
+ * {DB_ENV,DB}->get_msgcall.
+ *
+ * PUBLIC: void __env_get_msgcall
+ * PUBLIC: __P((DB_ENV *, void (**)(const DB_ENV *, const char *)));
+ */
+void
+__env_get_msgcall(dbenv, msgcallp)
+ DB_ENV *dbenv;
+ void (**msgcallp) __P((const DB_ENV *, const char *));
+{
+ if (msgcallp != NULL)
+ *msgcallp = dbenv->db_msgcall;
+}
+
+/*
+ * __env_set_msgcall --
+ * {DB_ENV,DB}->set_msgcall.
+ *
+ * PUBLIC: void __env_set_msgcall
+ * PUBLIC: __P((DB_ENV *, void (*)(const DB_ENV *, const char *)));
+ */
+void
+__env_set_msgcall(dbenv, msgcall)
+ DB_ENV *dbenv;
+ void (*msgcall) __P((const DB_ENV *, const char *));
+{
+ dbenv->db_msgcall = msgcall;
+}
+
+/*
+ * __env_get_msgfile --
+ * {DB_ENV,DB}->get_msgfile.
+ *
+ * PUBLIC: void __env_get_msgfile __P((DB_ENV *, FILE **));
+ */
+void
+__env_get_msgfile(dbenv, msgfilep)
+ DB_ENV *dbenv;
+ FILE **msgfilep;
+{
+ *msgfilep = dbenv->db_msgfile;
+}
+
+/*
+ * __env_set_msgfile --
+ * {DB_ENV,DB}->set_msgfile.
+ *
+ * PUBLIC: void __env_set_msgfile __P((DB_ENV *, FILE *));
+ */
+void
+__env_set_msgfile(dbenv, msgfile)
+ DB_ENV *dbenv;
+ FILE *msgfile;
+{
+ dbenv->db_msgfile = msgfile;
+}
+
+/*
+ * __env_set_paniccall --
+ * {DB_ENV,DB}->set_paniccall.
+ *
+ * PUBLIC: int __env_set_paniccall __P((DB_ENV *, void (*)(DB_ENV *, int)));
+ */
+int
+__env_set_paniccall(dbenv, paniccall)
+ DB_ENV *dbenv;
+ void (*paniccall) __P((DB_ENV *, int));
+{
+ dbenv->db_paniccall = paniccall;
+ return (0);
+}
+
+/*
+ * __env_set_event_notify --
+ * DB_ENV->set_event_notify.
+ */
+static int
+__env_set_event_notify(dbenv, event_func)
+ DB_ENV *dbenv;
+ void (*event_func) __P((DB_ENV *, u_int32_t, void *));
+{
+ dbenv->db_event_func = event_func;
+ return (0);
+}
+
+static int
+__env_get_shm_key(dbenv, shm_keyp)
+ DB_ENV *dbenv;
+ long *shm_keyp; /* !!!: really a key_t *. */
+{
+ *shm_keyp = dbenv->shm_key;
+ return (0);
+}
+
+/*
+ * __env_set_shm_key --
+ * DB_ENV->set_shm_key.
+ *
+ * PUBLIC: int __env_set_shm_key __P((DB_ENV *, long));
+ */
+int
+__env_set_shm_key(dbenv, shm_key)
+ DB_ENV *dbenv;
+ long shm_key; /* !!!: really a key_t. */
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_shm_key");
+
+ dbenv->shm_key = shm_key;
+ return (0);
+}
+
+static int
+__env_get_tmp_dir(dbenv, dirp)
+ DB_ENV *dbenv;
+ const char **dirp;
+{
+ *dirp = dbenv->db_tmp_dir;
+ return (0);
+}
+
+/*
+ * __env_set_tmp_dir --
+ * DB_ENV->set_tmp_dir.
+ *
+ * PUBLIC: int __env_set_tmp_dir __P((DB_ENV *, const char *));
+ */
+int
+__env_set_tmp_dir(dbenv, dir)
+ DB_ENV *dbenv;
+ const char *dir;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ if (dbenv->db_tmp_dir != NULL)
+ __os_free(env, dbenv->db_tmp_dir);
+ return (__os_strdup(env, dir, &dbenv->db_tmp_dir));
+}
+
+static int
+__env_get_verbose(dbenv, which, onoffp)
+ DB_ENV *dbenv;
+ u_int32_t which;
+ int *onoffp;
+{
+ switch (which) {
+ case DB_VERB_BACKUP:
+ case DB_VERB_DEADLOCK:
+ case DB_VERB_FILEOPS:
+ case DB_VERB_FILEOPS_ALL:
+ case DB_VERB_RECOVERY:
+ case DB_VERB_REGISTER:
+ case DB_VERB_REPLICATION:
+ case DB_VERB_REP_ELECT:
+ case DB_VERB_REP_LEASE:
+ case DB_VERB_REP_MISC:
+ case DB_VERB_REP_MSGS:
+ case DB_VERB_REP_SYNC:
+ case DB_VERB_REP_SYSTEM:
+ case DB_VERB_REP_TEST:
+ case DB_VERB_REPMGR_CONNFAIL:
+ case DB_VERB_REPMGR_MISC:
+ case DB_VERB_WAITSFOR:
+ *onoffp = FLD_ISSET(dbenv->verbose, which) ? 1 : 0;
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/*
+ * __env_set_verbose --
+ * DB_ENV->set_verbose.
+ *
+ * PUBLIC: int __env_set_verbose __P((DB_ENV *, u_int32_t, int));
+ */
+int
+__env_set_verbose(dbenv, which, on)
+ DB_ENV *dbenv;
+ u_int32_t which;
+ int on;
+{
+ switch (which) {
+ case DB_VERB_BACKUP:
+ case DB_VERB_DEADLOCK:
+ case DB_VERB_FILEOPS:
+ case DB_VERB_FILEOPS_ALL:
+ case DB_VERB_RECOVERY:
+ case DB_VERB_REGISTER:
+ case DB_VERB_REPLICATION:
+ case DB_VERB_REP_ELECT:
+ case DB_VERB_REP_LEASE:
+ case DB_VERB_REP_MISC:
+ case DB_VERB_REP_MSGS:
+ case DB_VERB_REP_SYNC:
+ case DB_VERB_REP_SYSTEM:
+ case DB_VERB_REP_TEST:
+ case DB_VERB_REPMGR_CONNFAIL:
+ case DB_VERB_REPMGR_MISC:
+ case DB_VERB_WAITSFOR:
+ if (on)
+ FLD_SET(dbenv->verbose, which);
+ else
+ FLD_CLR(dbenv->verbose, which);
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/*
+ * __db_mi_env --
+ * Method illegally called with public environment.
+ *
+ * PUBLIC: int __db_mi_env __P((ENV *, const char *));
+ */
+int
+__db_mi_env(env, name)
+ ENV *env;
+ const char *name;
+{
+ __db_errx(env, DB_STR_A("1564",
+ "%s: method not permitted when environment specified", "%s"),
+ name);
+ return (EINVAL);
+}
+
+/*
+ * __db_mi_open --
+ * Method illegally called after open.
+ *
+ * PUBLIC: int __db_mi_open __P((ENV *, const char *, int));
+ */
+int
+__db_mi_open(env, name, after)
+ ENV *env;
+ const char *name;
+ int after;
+{
+ __db_errx(env, DB_STR_A("1565",
+ "%s: method not permitted %s handle's open method", "%s %s"),
+ name, after ? DB_STR_P("after") : DB_STR_P("before"));
+ return (EINVAL);
+}
+
+/*
+ * __env_not_config --
+ * Method or function called without required configuration.
+ *
+ * PUBLIC: int __env_not_config __P((ENV *, char *, u_int32_t));
+ */
+int
+__env_not_config(env, i, flags)
+ ENV *env;
+ char *i;
+ u_int32_t flags;
+{
+ char *sub;
+ int is_sub;
+
+ is_sub = 1;
+
+ switch (flags) {
+ case DB_INIT_CDB:
+ sub = "DB_INIT_CDB";
+ is_sub = 0;
+ break;
+ case DB_INIT_LOCK:
+ sub = "locking";
+ break;
+ case DB_INIT_LOG:
+ sub = "logging";
+ break;
+ case DB_INIT_MPOOL:
+ sub = "memory pool";
+ break;
+ case DB_INIT_MUTEX:
+ sub = "mutex";
+ break;
+ case DB_INIT_REP:
+ sub = "replication";
+ break;
+ case DB_INIT_TXN:
+ sub = "transaction";
+ break;
+ default:
+ sub = "<unspecified>";
+ break;
+ }
+
+ if (is_sub) {
+ __db_errx(env, DB_STR_A("1566",
+ "%s interface requires an environment configured for the %s subsystem",
+ "%s %s"), i, sub);
+ } else {
+ __db_errx(env, DB_STR_A("1587",
+ "%s interface requires an environment configured with %s",
+ "%s %s"), i, sub);
+ }
+
+ return (EINVAL);
+}
+
+/*
+ * __env_get_timeout --
+ * DB_ENV->get_timeout
+ */
+static int
+__env_get_timeout(dbenv, timeoutp, flags)
+ DB_ENV *dbenv;
+ db_timeout_t *timeoutp;
+ u_int32_t flags;
+{
+ int ret;
+
+ ret = 0;
+ if (flags == DB_SET_REG_TIMEOUT) {
+ *timeoutp = dbenv->envreg_timeout;
+ } else
+ ret = __lock_get_env_timeout(dbenv, timeoutp, flags);
+ return (ret);
+}
+
+/*
+ * __env_set_timeout --
+ * DB_ENV->set_timeout
+ *
+ * PUBLIC: int __env_set_timeout __P((DB_ENV *, db_timeout_t, u_int32_t));
+ */
+int
+__env_set_timeout(dbenv, timeout, flags)
+ DB_ENV *dbenv;
+ db_timeout_t timeout;
+ u_int32_t flags;
+{
+ int ret;
+
+ ret = 0;
+ if (flags == DB_SET_REG_TIMEOUT)
+ dbenv->envreg_timeout = timeout;
+ else
+ ret = __lock_set_env_timeout(dbenv, timeout, flags);
+ return (ret);
+}
diff --git a/src/env/env_name.c b/src/env/env_name.c
new file mode 100644
index 00000000..a3a0b371
--- /dev/null
+++ b/src/env/env_name.c
@@ -0,0 +1,285 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+static int __db_fullpath
+ __P((ENV *, const char *, const char *, int, int, char **));
+
+#define DB_ADDSTR(add) { \
+ /* \
+ * The string might be NULL or zero-length, and the p[-1] \
+ * might indirect to before the beginning of our buffer. \
+ */ \
+ if ((add) != NULL && (add)[0] != '\0') { \
+ /* If leading slash, start over. */ \
+ if (__os_abspath(add)) { \
+ p = str; \
+ slash = 0; \
+ } \
+ /* Append to the current string. */ \
+ len = strlen(add); \
+ if (slash) \
+ *p++ = PATH_SEPARATOR[0]; \
+ memcpy(p, add, len); \
+ p += len; \
+ slash = strchr(PATH_SEPARATOR, p[-1]) == NULL; \
+ } \
+}
+
+/*
+ * __db_fullpath --
+ * Constructs a path name relative to the environment home, and optionally
+ * checks whether the file or directory exist.
+ */
+static int
+__db_fullpath(env, dir, file, check_file, check_dir, namep)
+ ENV *env;
+ const char *dir;
+ const char *file;
+ int check_file;
+ int check_dir;
+ char **namep;
+{
+ size_t len;
+ const char *home;
+ char *p, *str;
+ int isdir, ret, slash;
+
+ /* All paths are relative to the environment home. */
+ home = (env == NULL) ? NULL : env->db_home;
+
+ len =
+ (home == NULL ? 0 : strlen(home) + 1) +
+ (dir == NULL ? 0 : strlen(dir) + 1) +
+ (file == NULL ? 0 : strlen(file) + 1);
+
+ if ((ret = __os_malloc(env, len, &str)) != 0)
+ return (ret);
+
+ slash = 0;
+ p = str;
+ DB_ADDSTR(home);
+ DB_ADDSTR(dir);
+ *p = '\0';
+ if (check_dir && (__os_exists(env, str, &isdir) != 0 || !isdir)) {
+ __os_free(env, str);
+ return (ENOENT);
+ }
+ DB_ADDSTR(file);
+ *p = '\0';
+
+ /*
+ * If we're opening a data file, see if it exists. If not, keep
+ * trying.
+ */
+ if (check_file && __os_exists(env, str, NULL) != 0) {
+ __os_free(env, str);
+ return (ENOENT);
+ }
+
+ if (namep == NULL)
+ __os_free(env, str);
+ else
+ *namep = str;
+ return (0);
+}
+
+#define DB_CHECKFILE(file, dir, check_file, check_dir, namep, ret_dir) do { \
+ ret = __db_fullpath(env, dir, file, \
+ check_file, check_dir, namep); \
+ if (ret == 0 && (ret_dir) != NULL) \
+ *(ret_dir) = (dir); \
+ if (ret != ENOENT) \
+ return (ret); \
+} while (0)
+
+/*
+ * __db_appname --
+ * Given an optional DB environment, directory and file name and type
+ * of call, build a path based on the ENV->open rules, and return
+ * it in allocated space. Dirp can be used to specify a data directory
+ * to use. If not and one is used then drip will contain a pointer
+ * to the directory name.
+ *
+ * PUBLIC: int __db_appname __P((ENV *, APPNAME,
+ * PUBLIC: const char *, const char **, char **));
+ */
+int
+__db_appname(env, appname, file, dirp, namep)
+ ENV *env;
+ APPNAME appname;
+ const char *file;
+ const char **dirp;
+ char **namep;
+{
+ DB_ENV *dbenv;
+ char **ddp;
+ const char *dir;
+ int ret;
+
+ dbenv = env->dbenv;
+ dir = NULL;
+
+ if (namep != NULL)
+ *namep = NULL;
+
+ /*
+ * Absolute path names are never modified. If the file is an absolute
+ * path, we're done.
+ */
+ if (file != NULL && __os_abspath(file))
+ return (__os_strdup(env, file, namep));
+
+ /*
+ * DB_APP_NONE:
+ * DB_HOME/file
+ * DB_APP_DATA:
+ * DB_HOME/DB_DATA_DIR/file
+ * DB_APP_LOG:
+ * DB_HOME/DB_LOG_DIR/file
+ * DB_APP_TMP:
+ * DB_HOME/DB_TMP_DIR/<create>
+ */
+ switch (appname) {
+ case DB_APP_NONE:
+ break;
+ case DB_APP_RECOVER:
+ case DB_APP_DATA:
+ /*
+ * First, step through the data_dir entries, if any, looking
+ * for the file.
+ */
+ if (dbenv != NULL && dbenv->db_data_dir != NULL)
+ for (ddp = dbenv->db_data_dir; *ddp != NULL; ddp++)
+ DB_CHECKFILE(file, *ddp, 1, 0, namep, dirp);
+
+ /* Second, look in the environment home directory. */
+ DB_CHECKFILE(file, NULL, 1, 0, namep, dirp);
+
+ /*
+ * Otherwise, we're going to create. Use the specified
+ * directory unless we're in recovery and it doesn't exist.
+ */
+ if (dirp != NULL && *dirp != NULL)
+ DB_CHECKFILE(file, *dirp, 0,
+ appname == DB_APP_RECOVER, namep, dirp);
+
+ /* Finally, use the create directory, if set. */
+ if (dbenv != NULL && dbenv->db_create_dir != NULL)
+ dir = dbenv->db_create_dir;
+ break;
+ case DB_APP_LOG:
+ if (dbenv != NULL)
+ dir = dbenv->db_log_dir;
+ break;
+ case DB_APP_TMP:
+ if (dbenv != NULL)
+ dir = dbenv->db_tmp_dir;
+ break;
+ case DB_APP_META:
+ if (dbenv != NULL)
+ dir = dbenv->db_md_dir;
+ break;
+ }
+
+ /*
+ * Construct the full path. For temporary files, it is an error if the
+ * directory does not exist: if it doesn't, checking whether millions
+ * of temporary files exist inside it takes a *very* long time.
+ */
+ DB_CHECKFILE(file, dir, 0, appname == DB_APP_TMP, namep, dirp);
+
+ return (ret);
+}
+
+/*
+ * __db_tmp_open --
+ * Create a temporary file.
+ *
+ * PUBLIC: int __db_tmp_open __P((ENV *, u_int32_t, DB_FH **));
+ */
+int
+__db_tmp_open(env, oflags, fhpp)
+ ENV *env;
+ u_int32_t oflags;
+ DB_FH **fhpp;
+{
+ pid_t pid;
+ int filenum, i, ipid, ret;
+ char *path;
+ char *firstx, *trv;
+
+ DB_ASSERT(env, fhpp != NULL);
+ *fhpp = NULL;
+
+#define DB_TRAIL "BDBXXXXX"
+ if ((ret = __db_appname(env, DB_APP_TMP, DB_TRAIL, NULL, &path)) != 0)
+ goto done;
+
+ /* Replace the X's with the process ID (in decimal). */
+ __os_id(env->dbenv, &pid, NULL);
+ ipid = (int)pid;
+ if (ipid < 0)
+ ipid = -ipid;
+ for (trv = path + strlen(path); *--trv == 'X'; ipid /= 10)
+ *trv = '0' + (u_char)(ipid % 10);
+ firstx = trv + 1;
+
+ /* Loop, trying to open a file. */
+ for (filenum = 1;; filenum++) {
+ if ((ret = __os_open(env, path, 0,
+ oflags | DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_TEMP,
+ DB_MODE_600, fhpp)) == 0) {
+ ret = 0;
+ goto done;
+ }
+
+ /*
+ * !!!:
+ * If we don't get an EEXIST error, then there's something
+ * seriously wrong. Unfortunately, if the implementation
+ * doesn't return EEXIST for O_CREAT and O_EXCL regardless
+ * of other possible errors, we've lost.
+ */
+ if (ret != EEXIST) {
+ __db_err(env, ret, DB_STR_A("1586",
+ "temporary open: %s", "%s"), path);
+ goto done;
+ }
+
+ /*
+ * Generate temporary file names in a backwards-compatible way.
+ * If pid == 12345, the result is:
+ * <path>/DB12345 (tried above, the first time through).
+ * <path>/DBa2345 ... <path>/DBz2345
+ * <path>/DBaa345 ... <path>/DBaz345
+ * <path>/DBba345, and so on.
+ *
+ * XXX
+ * This algorithm is O(n**2) -- that is, creating 100 temporary
+ * files requires 5,000 opens, creating 1000 files requires
+ * 500,000. If applications open a lot of temporary files, we
+ * could improve performance by switching to timestamp-based
+ * file names.
+ */
+ for (i = filenum, trv = firstx; i > 0; i = (i - 1) / 26)
+ if (*trv++ == '\0') {
+ ret = EINVAL;
+ goto done;
+ }
+
+ for (i = filenum; i > 0; i = (i - 1) / 26)
+ *--trv = 'a' + ((i - 1) % 26);
+ }
+done:
+ __os_free(env, path);
+ return (ret);
+}
diff --git a/src/env/env_open.c b/src/env/env_open.c
new file mode 100644
index 00000000..7eddca3a
--- /dev/null
+++ b/src/env/env_open.c
@@ -0,0 +1,1262 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/crypto.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
+#include "dbinc/txn.h"
+
+static int __env_open_arg __P((DB_ENV *, u_int32_t));
+static int __file_handle_cleanup __P((ENV *));
+
+/*
+ * db_version --
+ * Return legacy version information, including DB Major Version,
+ * DB Minor Version, and DB Patch/Build numbers.
+ *
+ * EXTERN: char *db_version __P((int *, int *, int *));
+ */
+char *
+db_version(majverp, minverp, patchp)
+ int *majverp, *minverp, *patchp;
+{
+ if (majverp != NULL)
+ *majverp = DB_VERSION_MAJOR;
+ if (minverp != NULL)
+ *minverp = DB_VERSION_MINOR;
+ if (patchp != NULL)
+ *patchp = DB_VERSION_PATCH;
+ return ((char *)DB_VERSION_STRING);
+}
+
+/*
+ * db_full_version --
+ * Return complete version information, including Oracle Family,
+ * Oracle Release, DB Major Version, DB Minor Version, and DB
+ * Patch/Build numbers.
+ *
+ * EXTERN: char *db_full_version __P((int *, int *, int *, int *, int *));
+ */
+char *
+db_full_version(familyp, releasep, majverp, minverp, patchp)
+ int *familyp, *releasep, *majverp, *minverp, *patchp;
+{
+ if (familyp != NULL)
+ *familyp = DB_VERSION_FAMILY;
+ if (releasep != NULL)
+ *releasep = DB_VERSION_RELEASE;
+ if (majverp != NULL)
+ *majverp = DB_VERSION_MAJOR;
+ if (minverp != NULL)
+ *minverp = DB_VERSION_MINOR;
+ if (patchp != NULL)
+ *patchp = DB_VERSION_PATCH;
+ return ((char *)DB_VERSION_FULL_STRING);
+}
+
+/*
+ * __env_open_pp --
+ * DB_ENV->open pre/post processing.
+ *
+ * PUBLIC: int __env_open_pp __P((DB_ENV *, const char *, u_int32_t, int));
+ */
+int
+__env_open_pp(dbenv, db_home, flags, mode)
+ DB_ENV *dbenv;
+ const char *db_home;
+ u_int32_t flags;
+ int mode;
+{
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->open");
+
+#undef OKFLAGS
+#define OKFLAGS \
+ (DB_CREATE | DB_FAILCHK | DB_FAILCHK_ISALIVE | DB_INIT_CDB | \
+ DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_REP | \
+ DB_INIT_TXN | DB_LOCKDOWN | DB_NO_CHECKPOINT | DB_PRIVATE | \
+ DB_RECOVER | DB_RECOVER_FATAL | DB_REGISTER | DB_SYSTEM_MEM | \
+ DB_THREAD | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
+#undef OKFLAGS_CDB
+#define OKFLAGS_CDB \
+ (DB_CREATE | DB_INIT_CDB | DB_INIT_MPOOL | DB_LOCKDOWN | \
+ DB_PRIVATE | DB_SYSTEM_MEM | DB_THREAD | \
+ DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
+
+ if ((ret = __db_fchk(env, "DB_ENV->open", flags, OKFLAGS)) != 0)
+ return (ret);
+ if ((ret = __db_fcchk(
+ env, "DB_ENV->open", flags, DB_INIT_CDB, ~OKFLAGS_CDB)) != 0)
+ return (ret);
+
+#if defined(HAVE_MIXED_SIZE_ADDRESSING) && (SIZEOF_CHAR_P == 8)
+ if (F_ISSET(env, DB_PRIVATE)) {
+ __db_errx(env, DB_STR("1589", "DB_PRIVATE is not "
+ "supported by 64-bit applications in "
+ "mixed-size-addressing mode"));
+ return (EINVAL);
+ }
+#endif
+
+ return (__env_open(dbenv, db_home, flags, mode));
+}
+
+/*
+ * __env_open --
+ * DB_ENV->open.
+ *
+ * PUBLIC: int __env_open __P((DB_ENV *, const char *, u_int32_t, int));
+ */
+int
+__env_open(dbenv, db_home, flags, mode)
+ DB_ENV *dbenv;
+ const char *db_home;
+ u_int32_t flags;
+ int mode;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ u_int32_t orig_flags;
+ int register_recovery, ret, t_ret;
+
+ ip = NULL;
+ env = dbenv->env;
+ register_recovery = 0;
+
+ /* Initial configuration. */
+ if ((ret = __env_config(dbenv, db_home, &flags, mode)) != 0)
+ return (ret);
+
+ /*
+ * Save the DB_ENV handle's configuration flags as set by user-called
+ * configuration methods and the environment directory's DB_CONFIG
+ * file. If we use this DB_ENV structure to recover the existing
+ * environment or to remove an environment we created after failure,
+ * we'll restore the DB_ENV flags to these values.
+ */
+ orig_flags = dbenv->flags;
+
+ /* Check open flags. */
+ if ((ret = __env_open_arg(dbenv, flags)) != 0)
+ return (ret);
+
+ /*
+ * If we're going to register with the environment, that's the first
+ * thing we do.
+ */
+ if (LF_ISSET(DB_REGISTER)) {
+ /*
+ * Through the SQL interface (btree.c) we set
+ * DB_FAILCHK_ISALIVE. When set, we want to run failchk
+ * if a recovery is needed. Set up the infrastructure to run
+ * it. SQL applications have no way to specify the thread
+ * count or an isalive, so force it here. Failchk is run
+ * inside of register code.
+ */
+ if (LF_ISSET(DB_FAILCHK_ISALIVE)) {
+ (void)__env_set_thread_count(dbenv, 50);
+ dbenv->is_alive = __envreg_isalive;
+ }
+
+ if ((ret =
+ __envreg_register(env, &register_recovery, flags)) != 0)
+ goto err;
+ if (register_recovery) {
+ if (!LF_ISSET(DB_RECOVER)) {
+ __db_errx(env, DB_STR("1567",
+ "The DB_RECOVER flag was not specified, and recovery is needed"));
+ ret = DB_RUNRECOVERY;
+ goto err;
+ }
+ } else
+ LF_CLR(DB_RECOVER);
+ }
+
+ /*
+ * If we're doing recovery, destroy the environment so that we create
+ * all the regions from scratch. The major concern I have is if the
+ * application stomps the environment with a rogue pointer. We have
+ * no way of detecting that, and we could be forced into a situation
+ * where we start up and then crash, repeatedly.
+ *
+ * We do not check any flags like DB_PRIVATE before calling remove.
+ * We don't care if the current environment was private or not, we
+ * want to remove files left over for any reason, from any session.
+ */
+retry: if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))
+#ifdef HAVE_REPLICATION
+ if ((ret = __rep_reset_init(env)) != 0 ||
+ (ret = __env_remove_env(env)) != 0 ||
+#else
+ if ((ret = __env_remove_env(env)) != 0 ||
+#endif
+ (ret = __env_refresh(dbenv, orig_flags, 0)) != 0)
+ goto err;
+
+ if ((ret = __env_attach_regions(dbenv, flags, orig_flags, 1)) != 0)
+ goto err;
+
+ /*
+ * After attached to env, run failchk if not doing register
+ * recovery. Not providing this option with the DB_FAILCHK_ISALIVE
+ * flag.
+ */
+ if (LF_ISSET(DB_FAILCHK) && !register_recovery) {
+ ENV_ENTER(env, ip);
+ if ((ret = __env_failchk_int(dbenv)) != 0)
+ goto err;
+ ENV_LEAVE(env, ip);
+ }
+
+err: if (ret != 0)
+ (void)__env_refresh(dbenv, orig_flags, 0);
+
+ if (register_recovery) {
+ /*
+ * If recovery succeeded, release our exclusive lock, other
+ * processes can now proceed.
+ *
+ * If recovery failed, unregister now and let another process
+ * clean up.
+ */
+ if (ret == 0 && (t_ret = __envreg_xunlock(env)) != 0)
+ ret = t_ret;
+ if (ret != 0)
+ (void)__envreg_unregister(env, 1);
+ }
+
+ /*
+ * If the open is called with DB_REGISTER we can potentially skip
+ * running recovery on a panicked environment. We can't check the panic
+ * bit earlier since checking requires opening the environment.
+ * Only retry if DB_RECOVER was specified - the register_recovery flag
+ * indicates that.
+ */
+ if (ret == DB_RUNRECOVERY && !register_recovery &&
+ !LF_ISSET(DB_RECOVER) && LF_ISSET(DB_REGISTER)) {
+ LF_SET(DB_RECOVER);
+ goto retry;
+ }
+
+ return (ret);
+}
+
+/*
+ * __env_open_arg --
+ * DB_ENV->open flags checking.
+ */
+static int
+__env_open_arg(dbenv, flags)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+{
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+ ret = 0;
+
+ if (LF_ISSET(DB_REGISTER)) {
+ if (!__os_support_db_register()) {
+ __db_errx(env, DB_STR("1568",
+ "Berkeley DB library does not support DB_REGISTER on this system"));
+ return (EINVAL);
+ }
+ if ((ret = __db_fcchk(env, "DB_ENV->open", flags,
+ DB_PRIVATE, DB_REGISTER | DB_SYSTEM_MEM)) != 0)
+ return (ret);
+ if (LF_ISSET(DB_CREATE) && !LF_ISSET(DB_INIT_TXN)) {
+ __db_errx(env, DB_STR("1569",
+ "registration requires transaction support"));
+ return (EINVAL);
+ }
+ }
+ /*
+ * Only check for flags compatible with DB_INIT_REP when creating
+ * since otherwise it'll be ignored anyway.
+ */
+ if (LF_ISSET(DB_INIT_REP) && LF_ISSET(DB_CREATE)) {
+ if (!__os_support_replication()) {
+ __db_errx(env, DB_STR("1570",
+ "Berkeley DB library does not support replication on this system"));
+ return (EINVAL);
+ }
+ if (!LF_ISSET(DB_INIT_LOCK)) {
+ __db_errx(env, DB_STR("1571",
+ "replication requires locking support"));
+ return (EINVAL);
+ }
+ if (!LF_ISSET(DB_INIT_TXN)) {
+ __db_errx(env, DB_STR("1572",
+ "replication requires transaction support"));
+ return (EINVAL);
+ }
+ }
+ if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) {
+ if ((ret = __db_fcchk(env,
+ "DB_ENV->open", flags, DB_RECOVER, DB_RECOVER_FATAL)) != 0)
+ return (ret);
+ if ((ret = __db_fcchk(env,
+ "DB_ENV->open", flags, DB_REGISTER, DB_RECOVER_FATAL)) != 0)
+ return (ret);
+ if (!LF_ISSET(DB_CREATE)) {
+ __db_errx(env, DB_STR("1573",
+ "recovery requires the create flag"));
+ return (EINVAL);
+ }
+ if (!LF_ISSET(DB_INIT_TXN)) {
+ __db_errx(env, DB_STR("1574",
+ "recovery requires transaction support"));
+ return (EINVAL);
+ }
+ }
+ if (LF_ISSET(DB_FAILCHK)) {
+ if (!ALIVE_ON(env)) {
+ __db_errx(env, DB_STR("1575",
+ "DB_FAILCHK requires DB_ENV->is_alive be configured"));
+ return (EINVAL);
+ }
+ if (dbenv->thr_max == 0) {
+ __db_errx(env, DB_STR("1576",
+ "DB_FAILCHK requires DB_ENV->set_thread_count be configured"));
+ return (EINVAL);
+ }
+ }
+
+#ifdef HAVE_MUTEX_THREAD_ONLY
+ /*
+ * Currently we support one kind of mutex that is intra-process only,
+ * POSIX 1003.1 pthreads, because a variety of systems don't support
+ * the full pthreads API, and our only alternative is test-and-set.
+ */
+ if (!LF_ISSET(DB_PRIVATE)) {
+ __db_errx(env, DB_STR("1577",
+ "Berkeley DB library configured to support only private environments"));
+ return (EINVAL);
+ }
+#endif
+
+#ifdef HAVE_MUTEX_FCNTL
+ /*
+ * !!!
+ * We need a file descriptor for fcntl(2) locking. We use the file
+ * handle from the REGENV file for this purpose.
+ *
+ * Since we may be using shared memory regions, e.g., shmget(2), and
+ * not a mapped-in regular file, the backing file may be only a few
+ * bytes in length. So, this depends on the ability to call fcntl to
+ * lock file offsets much larger than the actual physical file. I
+ * think that's safe -- besides, very few systems actually need this
+ * kind of support, SunOS is the only one still in wide use of which
+ * I'm aware.
+ *
+ * The error case is if an application lacks spinlocks and wants to be
+ * threaded. That doesn't work because fcntl will lock the underlying
+ * process, including all its threads.
+ */
+ if (F_ISSET(env, ENV_THREAD)) {
+ __db_errx(env, DB_STR("1578",
+ "architecture lacks fast mutexes: applications cannot be threaded"));
+ return (EINVAL);
+ }
+#endif
+ return (ret);
+}
+
+/*
+ * __env_remove --
+ * DB_ENV->remove.
+ *
+ * PUBLIC: int __env_remove __P((DB_ENV *, const char *, u_int32_t));
+ */
+int
+__env_remove(dbenv, db_home, flags)
+ DB_ENV *dbenv;
+ const char *db_home;
+ u_int32_t flags;
+{
+ ENV *env;
+ int ret, t_ret;
+
+ env = dbenv->env;
+
+#undef OKFLAGS
+#define OKFLAGS \
+ (DB_FORCE | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
+
+ /* Validate arguments. */
+ if ((ret = __db_fchk(env, "DB_ENV->remove", flags, OKFLAGS)) != 0)
+ return (ret);
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->remove");
+
+ if ((ret = __env_config(dbenv, db_home, &flags, 0)) != 0)
+ return (ret);
+
+ /*
+ * Turn the environment off -- if the environment is corrupted, this
+ * could fail. Ignore any error if we're forcing the question.
+ */
+ if ((ret = __env_turn_off(env, flags)) == 0 || LF_ISSET(DB_FORCE))
+ ret = __env_remove_env(env);
+
+ if ((t_ret = __env_close(dbenv, 0)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __env_config --
+ * Argument-based initialization.
+ *
+ * PUBLIC: int __env_config __P((DB_ENV *, const char *, u_int32_t *, int));
+ */
+int
+__env_config(dbenv, db_home, flagsp, mode)
+ DB_ENV *dbenv;
+ const char *db_home;
+ u_int32_t *flagsp;
+ int mode;
+{
+ ENV *env;
+ int ret;
+ u_int32_t flags;
+ char *home, home_buf[DB_MAXPATHLEN];
+
+ env = dbenv->env;
+ flags = *flagsp;
+
+ /*
+ * Set the database home.
+ *
+ * Use db_home by default, this allows utilities to reasonably
+ * override the environment either explicitly or by using a -h
+ * option. Otherwise, use the environment if it's permitted
+ * and initialized.
+ */
+ home = (char *)db_home;
+ if (home == NULL && (LF_ISSET(DB_USE_ENVIRON) ||
+ (LF_ISSET(DB_USE_ENVIRON_ROOT) && __os_isroot()))) {
+ home = home_buf;
+ if ((ret = __os_getenv(
+ env, "DB_HOME", &home, sizeof(home_buf))) != 0)
+ return (ret);
+ /*
+ * home set to NULL if __os_getenv failed to find DB_HOME.
+ */
+ }
+ if (home != NULL) {
+ if (env->db_home != NULL)
+ __os_free(env, env->db_home);
+ if ((ret = __os_strdup(env, home, &env->db_home)) != 0)
+ return (ret);
+ }
+
+ /* Save a copy of the DB_ENV->open method flags. */
+ env->open_flags = flags;
+
+ /* Default permissions are read-write for both owner and group. */
+ env->db_mode = mode == 0 ? DB_MODE_660 : mode;
+
+ /* Read the DB_CONFIG file. */
+ if ((ret = __env_read_db_config(env)) != 0)
+ return (ret);
+
+ /*
+ * Update the DB_ENV->open method flags. The copy of the flags might
+ * have been changed during reading DB_CONFIG file.
+ */
+ flags = env->open_flags;
+
+ /*
+ * If no temporary directory path was specified in the config file,
+ * choose one.
+ */
+ if (dbenv->db_tmp_dir == NULL && (ret = __os_tmpdir(env, flags)) != 0)
+ return (ret);
+
+ *flagsp = flags;
+ return (0);
+}
+
+/*
+ * __env_close_pp --
+ * DB_ENV->close pre/post processor.
+ *
+ * PUBLIC: int __env_close_pp __P((DB_ENV *, u_int32_t));
+ */
+int
+__env_close_pp(dbenv, flags)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int rep_check, ret, t_ret;
+ u_int32_t close_flags, flags_orig;
+
+ env = dbenv->env;
+ ret = 0;
+ close_flags = flags_orig = 0;
+
+ /*
+ * Validate arguments, but as a DB_ENV handle destructor, we can't
+ * fail.
+ */
+ if (flags != 0 && flags != DB_FORCESYNC &&
+ (t_ret = __db_ferr(env, "DB_ENV->close", 0)) != 0 && ret == 0)
+ ret = t_ret;
+
+#define DBENV_FORCESYNC 0x00000001
+#define DBENV_CLOSE_REPCHECK 0x00000010
+ if (flags == DB_FORCESYNC)
+ close_flags |= DBENV_FORCESYNC;
+
+ /*
+ * If the environment has panic'd, all we do is try and discard
+ * the important resources.
+ */
+ if (PANIC_ISSET(env)) {
+ /* clean up from registry file */
+ if (dbenv->registry != NULL) {
+ /*
+ * Temporarily set no panic so we do not trigger the
+ * LAST_PANIC_CHECK_BEFORE_IO check in __os_physwr
+ * thus allowing the unregister to happen correctly.
+ */
+ flags_orig = F_ISSET(dbenv, DB_ENV_NOPANIC);
+ F_SET(dbenv, DB_ENV_NOPANIC);
+ (void)__envreg_unregister(env, 0);
+ dbenv->registry = NULL;
+ if (!flags_orig)
+ F_CLR(dbenv, DB_ENV_NOPANIC);
+ }
+
+ /* Close all underlying threads and sockets. */
+ if (IS_ENV_REPLICATED(env))
+ (void)__repmgr_close(env);
+
+ /* Close all underlying file handles. */
+ (void)__file_handle_cleanup(env);
+
+ PANIC_CHECK(env);
+ }
+
+ ENV_ENTER(env, ip);
+
+ rep_check = IS_ENV_REPLICATED(env) ? 1 : 0;
+ if (rep_check) {
+#ifdef HAVE_REPLICATION_THREADS
+ /*
+ * Shut down Replication Manager threads first of all. This
+ * must be done before __env_rep_enter to avoid a deadlock that
+ * could occur if repmgr's background threads try to do a rep
+ * operation that needs __rep_lockout.
+ */
+ if ((t_ret = __repmgr_close(env)) != 0 && ret == 0)
+ ret = t_ret;
+#endif
+ if ((t_ret = __env_rep_enter(env, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ }
+
+ if (rep_check)
+ close_flags |= DBENV_CLOSE_REPCHECK;
+ if ((t_ret = __env_close(dbenv, close_flags)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /* Don't ENV_LEAVE as we have already detached from the region. */
+ return (ret);
+}
+
+/*
+ * __env_close --
+ * DB_ENV->close.
+ *
+ * PUBLIC: int __env_close __P((DB_ENV *, u_int32_t));
+ */
+int
+__env_close(dbenv, flags)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+{
+ DB *dbp;
+ ENV *env;
+ int ret, rep_check, t_ret;
+ char **p;
+ u_int32_t close_flags;
+
+ env = dbenv->env;
+ ret = 0;
+ close_flags = LF_ISSET(DBENV_FORCESYNC) ? 0 : DB_NOSYNC;
+ rep_check = LF_ISSET(DBENV_CLOSE_REPCHECK);
+
+ /*
+ * Check to see if we were in the middle of restoring transactions and
+ * need to close the open files.
+ */
+ if (TXN_ON(env) && (t_ret = __txn_preclose(env)) != 0 && ret == 0)
+ ret = t_ret;
+
+#ifdef HAVE_REPLICATION
+ if ((t_ret = __rep_env_close(env)) != 0 && ret == 0)
+ ret = t_ret;
+#endif
+
+ /*
+ * Close all databases opened in this environment after the rep region
+ * is closed. Rep region's internal database is already closed now.
+ */
+ while ((dbp = TAILQ_FIRST(&env->dblist)) != NULL) {
+ /*
+ * Do not close the handle on a database partition, since it
+ * will be closed when closing the handle on the main database.
+ */
+ while (dbp != NULL && F_ISSET(dbp, DB_AM_PARTDB))
+ dbp = TAILQ_NEXT(dbp, dblistlinks);
+ DB_ASSERT(env, dbp != NULL);
+ /*
+ * Note down and ignore the error code. Since we can't do
+ * anything about the dbp handle anyway if the close
+ * operation fails. But we want to return the error to the
+ * caller. This is how this function takes care of various
+ * close operation errors.
+ */
+ if (dbp->alt_close != NULL)
+ t_ret = dbp->alt_close(dbp, close_flags);
+ else
+ t_ret = __db_close(dbp, NULL, close_flags);
+ if (t_ret != 0 && ret == 0)
+ ret = t_ret;
+ }
+
+ /*
+ * Detach from the regions and undo the allocations done by
+ * DB_ENV->open.
+ */
+ if ((t_ret = __env_refresh(dbenv, 0, rep_check)) != 0 && ret == 0)
+ ret = t_ret;
+
+#ifdef HAVE_CRYPTO
+ /*
+ * Crypto comes last, because higher level close functions need
+ * cryptography.
+ */
+ if ((t_ret = __crypto_env_close(env)) != 0 && ret == 0)
+ ret = t_ret;
+#endif
+
+ /* If we're registered, clean up. */
+ if (dbenv->registry != NULL) {
+ (void)__envreg_unregister(env, 0);
+ dbenv->registry = NULL;
+ }
+
+ /* Check we've closed all underlying file handles. */
+ if ((t_ret = __file_handle_cleanup(env)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /* Release any string-based configuration parameters we've copied. */
+ if (dbenv->db_log_dir != NULL)
+ __os_free(env, dbenv->db_log_dir);
+ dbenv->db_log_dir = NULL;
+ if (dbenv->db_tmp_dir != NULL)
+ __os_free(env, dbenv->db_tmp_dir);
+ dbenv->db_tmp_dir = NULL;
+ if (dbenv->db_md_dir != NULL)
+ __os_free(env, dbenv->db_md_dir);
+ dbenv->db_md_dir = NULL;
+ if (dbenv->db_data_dir != NULL) {
+ for (p = dbenv->db_data_dir; *p != NULL; ++p)
+ __os_free(env, *p);
+ __os_free(env, dbenv->db_data_dir);
+ dbenv->db_data_dir = NULL;
+ dbenv->data_next = 0;
+ }
+ if (dbenv->intermediate_dir_mode != NULL)
+ __os_free(env, dbenv->intermediate_dir_mode);
+ if (env->db_home != NULL) {
+ __os_free(env, env->db_home);
+ env->db_home = NULL;
+ }
+
+ if (env->backup_handle != NULL) {
+ __os_free(env, env->backup_handle);
+ env->backup_handle = NULL;
+ }
+
+ /* Discard the structure. */
+ __db_env_destroy(dbenv);
+
+ return (ret);
+}
+
+/*
+ * __env_refresh --
+ * Refresh the DB_ENV structure.
+ * PUBLIC: int __env_refresh __P((DB_ENV *, u_int32_t, int));
+ */
+int
+__env_refresh(dbenv, orig_flags, rep_check)
+ DB_ENV *dbenv;
+ u_int32_t orig_flags;
+ int rep_check;
+{
+ DB *ldbp;
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret, t_ret;
+
+ env = dbenv->env;
+ ret = 0;
+
+ /*
+ * Release resources allocated by DB_ENV->open, and return it to the
+ * state it was in just before __env_open was called. (This means
+ * state set by pre-open configuration functions must be preserved.)
+ *
+ * Refresh subsystems, in the reverse order they were opened (txn
+ * must be first, it may want to discard locks and flush the log).
+ *
+ * !!!
+ * Note that these functions, like all of __env_refresh, only undo
+ * the effects of __env_open. Functions that undo work done by
+ * db_env_create or by a configuration function should go in
+ * __env_close.
+ */
+ if (TXN_ON(env) &&
+ (t_ret = __txn_env_refresh(env)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if (LOGGING_ON(env) &&
+ (t_ret = __log_env_refresh(env)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /*
+ * Locking should come after logging, because closing log results
+ * in files closing which may require locks being released.
+ */
+ if (LOCKING_ON(env)) {
+ if (!F_ISSET(env, ENV_THREAD) &&
+ env->env_lref != NULL && (t_ret =
+ __lock_id_free(env, env->env_lref)) != 0 && ret == 0)
+ ret = t_ret;
+ env->env_lref = NULL;
+
+ if ((t_ret = __lock_env_refresh(env)) != 0 && ret == 0)
+ ret = t_ret;
+ }
+
+ /* Discard the DB_ENV, ENV handle mutexes. */
+ if ((t_ret = __mutex_free(env, &dbenv->mtx_db_env)) != 0 && ret == 0)
+ ret = t_ret;
+ if ((t_ret = __mutex_free(env, &env->mtx_env)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /*
+ * Discard DB list and its mutex.
+ * Discard the MT mutex.
+ *
+ * !!!
+ * This must be done after we close the log region, because we close
+ * database handles and so acquire this mutex when we close log file
+ * handles.
+ */
+ if (env->db_ref != 0) {
+ __db_errx(env, DB_STR("1579",
+ "Database handles still open at environment close"));
+ TAILQ_FOREACH(ldbp, &env->dblist, dblistlinks)
+ __db_errx(env, DB_STR_A("1580",
+ "Open database handle: %s%s%s", "%s %s %s"),
+ ldbp->fname == NULL ? "unnamed" : ldbp->fname,
+ ldbp->dname == NULL ? "" : "/",
+ ldbp->dname == NULL ? "" : ldbp->dname);
+ if (ret == 0)
+ ret = EINVAL;
+ }
+ TAILQ_INIT(&env->dblist);
+ if ((t_ret = __mutex_free(env, &env->mtx_dblist)) != 0 && ret == 0)
+ ret = t_ret;
+ if ((t_ret = __mutex_free(env, &env->mtx_mt)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if (env->mt != NULL) {
+ __os_free(env, env->mt);
+ env->mt = NULL;
+ }
+
+ if (MPOOL_ON(env)) {
+ /*
+ * If it's a private environment, flush the contents to disk.
+ * Recovery would have put everything back together, but it's
+ * faster and cleaner to flush instead.
+ *
+ * Ignore application max-write configuration, we're shutting
+ * down.
+ */
+ if (F_ISSET(env, ENV_PRIVATE) &&
+ !F_ISSET(dbenv, DB_ENV_NOFLUSH) &&
+ (t_ret = __memp_sync_int(env, NULL, 0,
+ DB_SYNC_CACHE | DB_SYNC_SUPPRESS_WRITE, NULL, NULL)) != 0 &&
+ ret == 0)
+ ret = t_ret;
+
+ if ((t_ret = __memp_env_refresh(env)) != 0 && ret == 0)
+ ret = t_ret;
+ }
+
+ /*
+ * If we're included in a shared replication handle count, this
+ * is our last chance to decrement that count.
+ *
+ * !!!
+ * We can't afford to do anything dangerous after we decrement the
+ * handle count, of course, as replication may be proceeding with
+ * client recovery. However, since we're discarding the regions
+ * as soon as we drop the handle count, there's little opportunity
+ * to do harm.
+ */
+ if (rep_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /*
+ * Refresh the replication region.
+ *
+ * Must come after we call __env_db_rep_exit above.
+ */
+ if (REP_ON(env) && (t_ret = __rep_env_refresh(env)) != 0 && ret == 0)
+ ret = t_ret;
+
+#ifdef HAVE_CRYPTO
+ /*
+ * Crypto comes last, because higher level close functions need
+ * cryptography.
+ */
+ if (env->reginfo != NULL &&
+ (t_ret = __crypto_env_refresh(env)) != 0 && ret == 0)
+ ret = t_ret;
+#endif
+
+ /*
+ * Mark the thread as out of the env before we get rid of the handles
+ * needed to do so.
+ */
+ if (env->thr_hashtab != NULL &&
+ (t_ret = __env_set_state(env, &ip, THREAD_OUT)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /*
+ * We are about to detach from the mutex region. This is the last
+ * chance we have to acquire/destroy a mutex -- acquire/destroy the
+ * mutex and release our reference.
+ *
+ * !!!
+ * There are two DbEnv methods that care about environment reference
+ * counts: DbEnv.close and DbEnv.remove. The DbEnv.close method is
+ * not a problem because it only decrements the reference count and
+ * no actual resources are discarded -- lots of threads of control
+ * can call DbEnv.close at the same time, and regardless of racing
+ * on the reference count mutex, we wouldn't have a problem. Since
+ * the DbEnv.remove method actually discards resources, we can have
+ * a problem.
+ *
+ * If we decrement the reference count to 0 here, go to sleep, and
+ * the DbEnv.remove method is called, by the time we run again, the
+ * underlying shared regions could have been removed. That's fine,
+ * except we might actually need the regions to resolve outstanding
+ * operations in the various subsystems, and if we don't have hard
+ * OS references to the regions, we could get screwed. Of course,
+ * we should have hard OS references to everything we need, but just
+ * in case, we put off decrementing the reference count as long as
+ * possible.
+ */
+ if ((t_ret = __env_ref_decrement(env)) != 0 && ret == 0)
+ ret = t_ret;
+
+#ifdef HAVE_MUTEX_SUPPORT
+ if (MUTEX_ON(env) &&
+ (t_ret = __mutex_env_refresh(env)) != 0 && ret == 0)
+ ret = t_ret;
+#endif
+ /* Free memory for thread tracking. */
+ if (env->reginfo != NULL) {
+ if (F_ISSET(env, ENV_PRIVATE)) {
+ __env_thread_destroy(env);
+ t_ret = __env_detach(env, 1);
+ } else
+ t_ret = __env_detach(env, 0);
+
+ if (t_ret != 0 && ret == 0)
+ ret = t_ret;
+
+ /*
+ * !!!
+ * Don't free env->reginfo or set the reference to NULL,
+ * that was done by __env_detach().
+ */
+ }
+
+ if (env->recover_dtab.int_dispatch != NULL) {
+ __os_free(env, env->recover_dtab.int_dispatch);
+ env->recover_dtab.int_size = 0;
+ env->recover_dtab.int_dispatch = NULL;
+ }
+ if (env->recover_dtab.ext_dispatch != NULL) {
+ __os_free(env, env->recover_dtab.ext_dispatch);
+ env->recover_dtab.ext_size = 0;
+ env->recover_dtab.ext_dispatch = NULL;
+ }
+
+ dbenv->flags = orig_flags;
+
+ return (ret);
+}
+
+/*
+ * __file_handle_cleanup --
+ * Close any underlying open file handles so we don't leak system
+ * resources.
+ */
+static int
+__file_handle_cleanup(env)
+ ENV *env;
+{
+ DB_FH *fhp;
+
+ if (TAILQ_FIRST(&env->fdlist) == NULL)
+ return (0);
+
+ __db_errx(env, DB_STR("1581",
+ "File handles still open at environment close"));
+ while ((fhp = TAILQ_FIRST(&env->fdlist)) != NULL) {
+ __db_errx(env, DB_STR_A("1582", "Open file handle: %s", "%s"),
+ fhp->name);
+ (void)__os_closehandle(env, fhp);
+ }
+ return (EINVAL);
+}
+
+/*
+ * __env_get_open_flags
+ * DbEnv.get_open_flags method.
+ *
+ * PUBLIC: int __env_get_open_flags __P((DB_ENV *, u_int32_t *));
+ */
+int
+__env_get_open_flags(dbenv, flagsp)
+ DB_ENV *dbenv;
+ u_int32_t *flagsp;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->get_open_flags");
+
+ *flagsp = env->open_flags;
+ return (0);
+}
+/*
+ * __env_attach_regions --
+ * Perform attaches to env and required regions (subsystems)
+ *
+ * PUBLIC: int __env_attach_regions __P((DB_ENV *, u_int32_t, u_int32_t, int));
+ */
+int
+__env_attach_regions(dbenv, flags, orig_flags, retry_ok)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+ u_int32_t orig_flags;
+ int retry_ok;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ REGINFO *infop;
+ u_int32_t init_flags;
+ int create_ok, rep_check, ret;
+
+ ip = NULL;
+ env = dbenv->env;
+ rep_check = 0;
+
+ /* Convert the DB_ENV->open flags to internal flags. */
+ create_ok = LF_ISSET(DB_CREATE) ? 1 : 0;
+ if (LF_ISSET(DB_LOCKDOWN))
+ F_SET(env, ENV_LOCKDOWN);
+ if (LF_ISSET(DB_PRIVATE))
+ F_SET(env, ENV_PRIVATE);
+ if (LF_ISSET(DB_RECOVER_FATAL))
+ F_SET(env, ENV_RECOVER_FATAL);
+ if (LF_ISSET(DB_SYSTEM_MEM))
+ F_SET(env, ENV_SYSTEM_MEM);
+ if (LF_ISSET(DB_THREAD))
+ F_SET(env, ENV_THREAD);
+
+ /*
+ * Create/join the environment. We pass in the flags of interest to
+ * a thread subsequently joining an environment we create. If we're
+ * not the ones to create the environment, our flags will be updated
+ * to match the existing environment.
+ */
+ init_flags = 0;
+ if (LF_ISSET(DB_INIT_CDB))
+ FLD_SET(init_flags, DB_INITENV_CDB);
+ if (F_ISSET(dbenv, DB_ENV_CDB_ALLDB))
+ FLD_SET(init_flags, DB_INITENV_CDB_ALLDB);
+ if (LF_ISSET(DB_INIT_LOCK))
+ FLD_SET(init_flags, DB_INITENV_LOCK);
+ if (LF_ISSET(DB_INIT_LOG))
+ FLD_SET(init_flags, DB_INITENV_LOG);
+ if (LF_ISSET(DB_INIT_MPOOL))
+ FLD_SET(init_flags, DB_INITENV_MPOOL);
+ if (LF_ISSET(DB_INIT_REP))
+ FLD_SET(init_flags, DB_INITENV_REP);
+ if (LF_ISSET(DB_INIT_TXN))
+ FLD_SET(init_flags, DB_INITENV_TXN);
+ if ((ret = __env_attach(env, &init_flags, create_ok, retry_ok)) != 0)
+ goto err;
+
+ /*
+ * __env_attach will return the saved init_flags field, which contains
+ * the DB_INIT_* flags used when the environment was created.
+ *
+ * We may be joining an environment -- reset our flags to match the
+ * ones in the environment.
+ */
+ if (FLD_ISSET(init_flags, DB_INITENV_CDB))
+ LF_SET(DB_INIT_CDB);
+ if (FLD_ISSET(init_flags, DB_INITENV_LOCK))
+ LF_SET(DB_INIT_LOCK);
+ if (FLD_ISSET(init_flags, DB_INITENV_LOG))
+ LF_SET(DB_INIT_LOG);
+ if (FLD_ISSET(init_flags, DB_INITENV_MPOOL))
+ LF_SET(DB_INIT_MPOOL);
+ if (FLD_ISSET(init_flags, DB_INITENV_REP))
+ LF_SET(DB_INIT_REP);
+ if (FLD_ISSET(init_flags, DB_INITENV_TXN))
+ LF_SET(DB_INIT_TXN);
+ if (FLD_ISSET(init_flags, DB_INITENV_CDB_ALLDB) &&
+ (ret = __env_set_flags(dbenv, DB_CDB_ALLDB, 1)) != 0)
+ goto err;
+
+ /* Initialize for CDB product. */
+ if (LF_ISSET(DB_INIT_CDB)) {
+ LF_SET(DB_INIT_LOCK);
+ F_SET(env, ENV_CDB);
+ }
+
+ /*
+ * Update the flags to match the database environment. The application
+ * may have specified flags of 0 to join the environment, and this line
+ * replaces that value with the flags corresponding to the existing,
+ * underlying set of subsystems. This means the DbEnv.get_open_flags
+ * method returns the flags to open the existing environment instead of
+ * the specific flags passed to the DbEnv.open method.
+ */
+ env->open_flags = flags;
+
+ /*
+ * The DB_ENV structure has now been initialized. Turn off further
+ * use of the DB_ENV structure and most initialization methods, we're
+ * about to act on the values we currently have.
+ */
+ F_SET(env, ENV_OPEN_CALLED);
+
+ infop = env->reginfo;
+
+#ifdef HAVE_MUTEX_SUPPORT
+ /*
+ * Initialize the mutex regions first before ENV_ENTER().
+ * Mutexes need to be 'on' when attaching to an existing env
+ * in order to safely allocate the thread tracking info.
+ */
+ if ((ret = __mutex_open(env, create_ok)) != 0)
+ goto err;
+ /* The MUTEX_REQUIRED() in __env_alloc() expects this to be set. */
+ infop->mtx_alloc = ((REGENV *)infop->primary)->mtx_regenv;
+#endif
+ /*
+ * Initialize thread tracking and enter the API.
+ */
+ if ((ret =
+ __env_thread_init(env, F_ISSET(infop, REGION_CREATE) ? 1 : 0)) != 0)
+ goto err;
+
+ ENV_ENTER(env, ip);
+
+ /*
+ * Initialize the subsystems.
+ */
+ /*
+ * We can now acquire/create mutexes: increment the region's reference
+ * count.
+ */
+ if ((ret = __env_ref_increment(env)) != 0)
+ goto err;
+
+ /*
+ * Initialize the handle mutexes.
+ */
+ if ((ret = __mutex_alloc(env,
+ MTX_ENV_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbenv->mtx_db_env)) != 0 ||
+ (ret = __mutex_alloc(env,
+ MTX_ENV_HANDLE, DB_MUTEX_PROCESS_ONLY, &env->mtx_env)) != 0)
+ goto err;
+
+ /*
+ * Initialize the replication area next, so that we can lock out this
+ * call if we're currently running recovery for replication.
+ */
+ if (LF_ISSET(DB_INIT_REP) && (ret = __rep_open(env)) != 0)
+ goto err;
+
+ rep_check = IS_ENV_REPLICATED(env) ? 1 : 0;
+ if (rep_check && (ret = __env_rep_enter(env, 0)) != 0)
+ goto err;
+
+ if (LF_ISSET(DB_INIT_MPOOL)) {
+ if ((ret = __memp_open(env, create_ok)) != 0)
+ goto err;
+
+ /*
+ * BDB does do cache I/O during recovery and when starting up
+ * replication. If creating a new environment, then suppress
+ * any application max-write configuration.
+ */
+ if (create_ok)
+ (void)__memp_set_config(
+ dbenv, DB_MEMP_SUPPRESS_WRITE, 1);
+
+ /*
+ * Initialize the DB list and its mutex. If the mpool is
+ * not initialized, we can't ever open a DB handle, which
+ * is why this code lives here.
+ */
+ TAILQ_INIT(&env->dblist);
+ if ((ret = __mutex_alloc(env, MTX_ENV_DBLIST,
+ DB_MUTEX_PROCESS_ONLY, &env->mtx_dblist)) != 0)
+ goto err;
+
+ /* Register DB's pgin/pgout functions. */
+ if ((ret = __memp_register(
+ env, DB_FTYPE_SET, __db_pgin, __db_pgout)) != 0)
+ goto err;
+ }
+
+ /*
+ * Initialize the ciphering area prior to any running of recovery so
+ * that we can initialize the keys, etc. before recovery, including
+ * the MT mutex.
+ *
+ * !!!
+ * This must be after the mpool init, but before the log initialization
+ * because log_open may attempt to run log_recover during its open.
+ */
+ if (LF_ISSET(DB_INIT_MPOOL | DB_INIT_LOG | DB_INIT_TXN) &&
+ (ret = __crypto_region_init(env)) != 0)
+ goto err;
+ if ((ret = __mutex_alloc(
+ env, MTX_TWISTER, DB_MUTEX_PROCESS_ONLY, &env->mtx_mt)) != 0)
+ goto err;
+
+ /*
+ * Transactions imply logging but do not imply locking. While almost
+ * all applications want both locking and logging, it would not be
+ * unreasonable for a single threaded process to want transactions for
+ * atomicity guarantees, but not necessarily need concurrency.
+ */
+ if (LF_ISSET(DB_INIT_LOG | DB_INIT_TXN))
+ if ((ret = __log_open(env)) != 0)
+ goto err;
+ if (LF_ISSET(DB_INIT_LOCK))
+ if ((ret = __lock_open(env)) != 0)
+ goto err;
+
+ if (LF_ISSET(DB_INIT_TXN)) {
+ if ((ret = __txn_open(env)) != 0)
+ goto err;
+
+ /*
+ * If the application is running with transactions, initialize
+ * the function tables.
+ */
+ if ((ret = __env_init_rec(env,
+ ((LOG *)env->lg_handle->reginfo.primary)->persist.version))
+ != 0)
+ goto err;
+ }
+
+ /* Perform recovery for any previous run. */
+ if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) &&
+ (ret = __db_apprec(env, ip, NULL, NULL, 1,
+ LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL | DB_NO_CHECKPOINT))) != 0)
+ goto err;
+
+ /*
+ * If we've created the regions, are running with transactions, and did
+ * not just run recovery, we need to log the fact that the transaction
+ * IDs got reset.
+ *
+ * If we ran recovery, there may be prepared-but-not-yet-committed
+ * transactions that need to be resolved. Recovery resets the minimum
+ * transaction ID and logs the reset if that's appropriate, so we
+ * don't need to do anything here in the recover case.
+ */
+ if (TXN_ON(env) &&
+ !FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) &&
+ F_ISSET(infop, REGION_CREATE) &&
+ !LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) &&
+ (ret = __txn_reset(env)) != 0)
+ goto err;
+
+ /* The database environment is ready for business. */
+ if ((ret = __env_turn_on(env)) != 0)
+ goto err;
+
+ if (rep_check)
+ ret = __env_db_rep_exit(env);
+
+ /* Turn any application-specific max-write configuration back on. */
+ if (LF_ISSET(DB_INIT_MPOOL))
+ (void)__memp_set_config(dbenv, DB_MEMP_SUPPRESS_WRITE, 0);
+
+err: if (ret == 0)
+ ENV_LEAVE(env, ip);
+ else {
+ /*
+ * If we fail after creating regions, panic and remove them.
+ *
+ * !!!
+ * No need to call __env_db_rep_exit, that work is done by the
+ * calls to __env_refresh.
+ */
+ infop = env->reginfo;
+ if (infop != NULL && F_ISSET(infop, REGION_CREATE)) {
+ ret = __env_panic(env, ret);
+
+ /* Refresh the DB_ENV so can use it to call remove. */
+ (void)__env_refresh(dbenv, orig_flags, rep_check);
+ (void)__env_remove_env(env);
+ (void)__env_refresh(dbenv, orig_flags, 0);
+ } else
+ (void)__env_refresh(dbenv, orig_flags, rep_check);
+ /* clear the fact that the region had been opened */
+ F_CLR(env, ENV_OPEN_CALLED);
+ }
+
+ return (ret);
+}
diff --git a/src/env/env_recover.c b/src/env/env_recover.c
new file mode 100644
index 00000000..9636554a
--- /dev/null
+++ b/src/env/env_recover.c
@@ -0,0 +1,1093 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/fop.h"
+#include "dbinc/btree.h"
+#include "dbinc/hash.h"
+#include "dbinc/heap.h"
+#include "dbinc/mp.h"
+#include "dbinc/qam.h"
+#include "dbinc/txn.h"
+
+#ifndef lint
+static const char copyright[] =
+ "Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.\n";
+#endif
+
+static int __db_log_corrupt __P((ENV *, DB_LSN *));
+static int __env_init_rec_42 __P((ENV *));
+static int __env_init_rec_43 __P((ENV *));
+static int __env_init_rec_46 __P((ENV *));
+static int __env_init_rec_47 __P((ENV *));
+static int __env_init_rec_48 __P((ENV *));
+static int __log_earliest __P((ENV *, DB_LOGC *, int32_t *, DB_LSN *));
+
+static double __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int));
+static int __log_backup __P((ENV *, DB_LOGC *, DB_LSN *, DB_LSN*));
+
+/*
+ * __db_apprec --
+ * Perform recovery. If max_lsn is non-NULL, then we are trying
+ * to synchronize this system up with another system that has a max
+ * LSN of max_lsn, so we need to roll back sufficiently far for that
+ * to work. See __log_backup for details.
+ *
+ * PUBLIC: int __db_apprec __P((ENV *,
+ * PUBLIC: DB_THREAD_INFO *, DB_LSN *, DB_LSN *, int, u_int32_t));
+ */
+int
+__db_apprec(env, ip, max_lsn, trunclsn, update, flags)
+ ENV *env;
+ DB_THREAD_INFO *ip;
+ DB_LSN *max_lsn, *trunclsn;
+ int update;
+ u_int32_t flags;
+{
+ DBT data;
+ DB_ENV *dbenv;
+ DB_LOGC *logc;
+ DB_LSN ckp_lsn, first_lsn, last_lsn, lowlsn, lsn, stop_lsn, tlsn;
+ DB_LSN *vtrunc_ckp, *vtrunc_lsn;
+ DB_TXNHEAD *txninfo;
+ DB_TXNREGION *region;
+ REGENV *renv;
+ REGINFO *infop;
+ __txn_ckp_args *ckp_args;
+ time_t now, tlow;
+ double nfiles;
+ u_int32_t hi_txn, log_size, txnid;
+ int32_t low;
+ int all_recovered, progress, rectype, ret, t_ret;
+ char *p, *pass;
+ char t1[CTIME_BUFLEN], t2[CTIME_BUFLEN], time_buf[CTIME_BUFLEN];
+
+ COMPQUIET(nfiles, (double)0.001);
+
+ dbenv = env->dbenv;
+ logc = NULL;
+ ckp_args = NULL;
+ hi_txn = TXN_MAXIMUM;
+ txninfo = NULL;
+ pass = DB_STR_P("initial");
+ ZERO_LSN(lsn);
+
+ /*
+ * XXX
+ * Get the log size. No locking required because we're single-threaded
+ * during recovery.
+ */
+ log_size = ((LOG *)env->lg_handle->reginfo.primary)->log_size;
+
+ /*
+ * If we need to, update the env handle timestamp.
+ */
+ if (update && REP_ON(env)) {
+ infop = env->reginfo;
+ renv = infop->primary;
+ (void)time(&renv->rep_timestamp);
+ }
+
+ /* Set in-recovery flags. */
+ F_SET(env->lg_handle, DBLOG_RECOVER);
+ region = env->tx_handle->reginfo.primary;
+ F_SET(region, TXN_IN_RECOVERY);
+
+ /* Allocate a cursor for the log. */
+ if ((ret = __log_cursor(env, &logc)) != 0)
+ goto err;
+
+ /*
+ * If the user is specifying recovery to a particular point in time
+ * or to a particular LSN, find the point to start recovery from.
+ */
+ ZERO_LSN(lowlsn);
+ if (max_lsn != NULL) {
+ if ((ret = __log_backup(env, logc, max_lsn, &lowlsn)) != 0)
+ goto err;
+ } else if (dbenv->tx_timestamp != 0) {
+ if ((ret = __log_earliest(env, logc, &low, &lowlsn)) != 0)
+ goto err;
+ if ((int32_t)dbenv->tx_timestamp < low) {
+ t1[sizeof(t1) - 1] = '\0';
+ (void)strncpy(t1, __os_ctime(
+ &dbenv->tx_timestamp, time_buf), sizeof(t1) - 1);
+ if ((p = strchr(t1, '\n')) != NULL)
+ *p = '\0';
+
+ t2[sizeof(t2) - 1] = '\0';
+ tlow = (time_t)low;
+ (void)strncpy(t2, __os_ctime(
+ &tlow, time_buf), sizeof(t2) - 1);
+ if ((p = strchr(t2, '\n')) != NULL)
+ *p = '\0';
+
+ __db_errx(env, DB_STR_A("1509",
+ "Invalid recovery timestamp %s; earliest time is %s",
+ "%s %s"), t1, t2);
+ ret = EINVAL;
+ goto err;
+ }
+ }
+
+ /*
+ * Recovery is done in three passes:
+ * Pass #0:
+ * We need to find the position from which we will open files.
+ * We need to open files beginning with the earlier of the
+ * most recent checkpoint LSN and a checkpoint LSN before the
+ * recovery timestamp, if specified. We need to be before the
+ * most recent checkpoint LSN because we are going to collect
+ * information about which transactions were begun before we
+ * start rolling forward. Those that were should never be undone
+ * because queue cannot use LSNs to determine what operations can
+ * safely be aborted and it cannot rollback operations in
+ * transactions for which there may be records not processed
+ * during recovery. We need to consider earlier points in time
+ * in case we are recovering to a particular timestamp.
+ *
+ * Pass #1:
+ * Read forward through the log from the position found in pass 0
+ * opening and closing files, and recording transactions for which
+ * we've seen their first record (the transaction's prev_lsn is
+ * 0,0). At the end of this pass, we know all transactions for
+ * which we've seen begins and we have the "current" set of files
+ * open.
+ *
+ * Pass #2:
+ * Read backward through the log undoing any uncompleted TXNs.
+ * There are four cases:
+ * 1. If doing catastrophic recovery, we read to the
+ * beginning of the log
+ * 2. If we are doing normal reovery, then we have to roll
+ * back to the most recent checkpoint LSN.
+ * 3. If we are recovering to a point in time, then we have
+ * to roll back to the checkpoint whose ckp_lsn is earlier
+ * than the specified time. __log_earliest will figure
+ * this out for us.
+ * 4. If we are recovering back to a particular LSN, then
+ * we have to roll back to the checkpoint whose ckp_lsn
+ * is earlier than the max_lsn. __log_backup will figure
+ * that out for us.
+ * In case 2, "uncompleted TXNs" include all those who committed
+ * after the user's specified timestamp.
+ *
+ * Pass #3:
+ * Read forward through the log from the LSN found in pass #2,
+ * redoing any committed TXNs (which committed after any user-
+ * specified rollback point). During this pass, checkpoint
+ * file information is ignored, and file openings and closings
+ * are redone.
+ *
+ * ckp_lsn -- lsn of the last checkpoint or the first in the log.
+ * first_lsn -- the lsn where the forward passes begin.
+ * last_lsn -- the last lsn in the log, used for feedback
+ * lowlsn -- the lsn we are rolling back to, if we are recovering
+ * to a point in time.
+ * lsn -- temporary use lsn.
+ * stop_lsn -- the point at which forward roll should stop
+ */
+
+ /*
+ * Find out the last lsn, so that we can estimate how far along we
+ * are in recovery. This will help us determine how much log there
+ * is between the first LSN that we're going to be working with and
+ * the last one. We assume that each of the three phases takes the
+ * same amount of time (a false assumption) and then use the %-age
+ * of the amount of log traversed to figure out how much of the
+ * pass we've accomplished.
+ *
+ * If we can't find any log records, we're kind of done.
+ */
+#ifdef UMRW
+ ZERO_LSN(last_lsn);
+#endif
+ memset(&data, 0, sizeof(data));
+ /*
+ * Pass #0
+ * Find the LSN from which we begin OPENFILES.
+ *
+ * If this is a catastrophic recovery, or if no checkpoint exists
+ * in the log, the LSN is the first LSN in the log.
+ *
+ * Otherwise, it is the minimum of (1) the LSN in the last checkpoint
+ * and (2) the LSN in the checkpoint before any specified recovery
+ * timestamp or max_lsn.
+ */
+ /*
+ * Get the first LSN in the log; it's an initial default
+ * even if this is not a catastrophic recovery.
+ */
+ if ((ret = __logc_get(logc, &ckp_lsn, &data, DB_FIRST)) != 0) {
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ else
+ __db_errx(env, DB_STR("1510",
+ "First log record not found"));
+ goto err;
+ }
+ first_lsn = ckp_lsn;
+
+ if (!LF_ISSET(DB_RECOVER_FATAL)) {
+ if ((ret = __txn_getckp(env, &ckp_lsn)) == 0 &&
+ (ret = __logc_get(logc, &ckp_lsn, &data, DB_SET)) == 0) {
+ /* We have a recent checkpoint. This is LSN (1). */
+ if ((ret = __txn_ckp_read(env,
+ data.data, &ckp_args)) != 0) {
+ __db_errx(env, DB_STR_A("1511",
+ "Invalid checkpoint record at [%ld][%ld]",
+ "%ld %ld"), (u_long)ckp_lsn.file,
+ (u_long)ckp_lsn.offset);
+ goto err;
+ }
+ first_lsn = ckp_args->ckp_lsn;
+ __os_free(env, ckp_args);
+ }
+
+ /*
+ * If LSN (2) exists, use it if it's before LSN (1).
+ * (If LSN (1) doesn't exist, first_lsn is the
+ * beginning of the log, so will "win" this check.)
+ *
+ * XXX
+ * In the recovery-to-a-timestamp case, lowlsn is chosen by
+ * __log_earliest, and is the checkpoint LSN of the
+ * *earliest* checkpoint in the unreclaimed log. I
+ * (krinsky) believe that we could optimize this by looking
+ * instead for the LSN of the *latest* checkpoint before
+ * the timestamp of interest, but I'm not sure that this
+ * is worth doing right now. (We have to look for lowlsn
+ * and low anyway, to make sure the requested timestamp is
+ * somewhere in the logs we have, and all that's required
+ * is that we pick *some* checkpoint after the beginning of
+ * the logs and before the timestamp.
+ */
+ if ((dbenv->tx_timestamp != 0 || max_lsn != NULL) &&
+ LOG_COMPARE(&lowlsn, &first_lsn) < 0) {
+ first_lsn = lowlsn;
+ }
+ }
+
+ if ((ret = __logc_get(logc, &last_lsn, &data, DB_LAST)) != 0) {
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ else
+ __db_errx(env, DB_STR("1512",
+ "Last log record not found"));
+ goto err;
+ }
+
+ rectype = 0;
+ txnid = 0;
+ do {
+ if (LOG_COMPARE(&lsn, &first_lsn) == 0)
+ break;
+ /* check if we have a recycle record. */
+ if (rectype != DB___txn_recycle)
+ LOGCOPY_32(env, &rectype, data.data);
+ /* txnid is after rectype, which is a u_int32. */
+ LOGCOPY_32(env, &txnid,
+ (u_int8_t *)data.data + sizeof(u_int32_t));
+
+ if (txnid != 0)
+ break;
+ } while ((ret = __logc_get(logc, &lsn, &data, DB_PREV)) == 0);
+
+ /*
+ * There are no transactions, so there is nothing to do unless
+ * we're recovering to an LSN. If we are, we need to proceed since
+ * we'll still need to do a vtruncate based on information we haven't
+ * yet collected.
+ */
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ else if (ret != 0)
+ goto err;
+
+ hi_txn = txnid;
+
+ /* Get the record at first_lsn. */
+ if ((ret = __logc_get(logc, &first_lsn, &data, DB_SET)) != 0) {
+ __db_errx(env, DB_STR_A("1513",
+ "Checkpoint LSN record [%ld][%ld] not found", "%ld %ld"),
+ (u_long)first_lsn.file, (u_long)first_lsn.offset);
+ goto err;
+ }
+
+ if (dbenv->db_feedback != NULL) {
+ if (last_lsn.file == first_lsn.file)
+ nfiles = (double)
+ (last_lsn.offset - first_lsn.offset) / log_size;
+ else
+ nfiles = (double)(last_lsn.file - first_lsn.file) +
+ (double)((log_size - first_lsn.offset) +
+ last_lsn.offset) / log_size;
+ /* We are going to divide by nfiles; make sure it isn't 0. */
+ if (nfiles < 0.001)
+ nfiles = 0.001;
+ }
+
+ /* Find a low txnid. */
+ ret = 0;
+ if (hi_txn != 0) do {
+ /* txnid is after rectype, which is a u_int32. */
+ LOGCOPY_32(env, &txnid,
+ (u_int8_t *)data.data + sizeof(u_int32_t));
+
+ if (txnid != 0)
+ break;
+ } while ((ret = __logc_get(logc, &lsn, &data, DB_NEXT)) == 0);
+
+ /*
+ * There are no transactions and we're not recovering to an LSN (see
+ * above), so there is nothing to do.
+ */
+ if (ret == DB_NOTFOUND) {
+ if (LOG_COMPARE(&lsn, &last_lsn) != 0)
+ ret = __db_log_corrupt(env, &lsn);
+ else
+ ret = 0;
+ }
+
+ /* Reset to the first lsn. */
+ if (ret != 0 ||
+ (ret = __logc_get(logc, &first_lsn, &data, DB_SET)) != 0)
+ goto err;
+
+ /* Initialize the transaction list. */
+ if ((ret = __db_txnlist_init(env, ip,
+ txnid, hi_txn, max_lsn, &txninfo)) != 0)
+ goto err;
+
+ /*
+ * Pass #1
+ * Run forward through the log starting at the first relevant lsn.
+ */
+ if ((ret = __env_openfiles(env, logc,
+ txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
+ goto err;
+
+ /* If there were no transactions, then we can bail out early. */
+ if (hi_txn == 0 && max_lsn == NULL) {
+ lsn = last_lsn;
+ goto done;
+ }
+
+ /*
+ * Pass #2.
+ *
+ * We used first_lsn to tell us how far back we need to recover,
+ * use it here.
+ */
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY))
+ __db_msg(env, DB_STR_A("1514",
+ "Recovery starting from [%lu][%lu]", "%lu %lu"),
+ (u_long)first_lsn.file, (u_long)first_lsn.offset);
+
+ pass = DB_STR_P("backward");
+ for (ret = __logc_get(logc, &lsn, &data, DB_LAST);
+ ret == 0 && LOG_COMPARE(&lsn, &first_lsn) >= 0;
+ ret = __logc_get(logc, &lsn, &data, DB_PREV)) {
+ if (dbenv->db_feedback != NULL) {
+ progress = 34 + (int)(33 * (__lsn_diff(&first_lsn,
+ &last_lsn, &lsn, log_size, 0) / nfiles));
+ dbenv->db_feedback(dbenv, DB_RECOVER, progress);
+ }
+
+ tlsn = lsn;
+ ret = __db_dispatch(env, &env->recover_dtab,
+ &data, &tlsn, DB_TXN_BACKWARD_ROLL, txninfo);
+ if (ret != 0) {
+ if (ret != DB_TXN_CKP)
+ goto msgerr;
+ else
+ ret = 0;
+ }
+ }
+ if (ret == DB_NOTFOUND) {
+ if (LOG_COMPARE(&lsn, &first_lsn) > 0)
+ ret = __db_log_corrupt(env, &lsn);
+ else
+ ret = 0;
+ }
+ if (ret != 0)
+ goto err;
+
+ /*
+ * Pass #3. If we are recovering to a timestamp or to an LSN,
+ * we need to make sure that we don't roll-forward beyond that
+ * point because there may be non-transactional operations (e.g.,
+ * closes that would fail). The last_lsn variable is used for
+ * feedback calculations, but use it to set an initial stopping
+ * point for the forward pass, and then reset appropriately to
+ * derive a real stop_lsn that tells how far the forward pass
+ * should go.
+ */
+ pass = DB_STR_P("forward");
+ stop_lsn = last_lsn;
+ if (max_lsn != NULL || dbenv->tx_timestamp != 0)
+ stop_lsn = ((DB_TXNHEAD *)txninfo)->maxlsn;
+
+ for (ret = __logc_get(logc, &lsn, &data, DB_NEXT);
+ ret == 0; ret = __logc_get(logc, &lsn, &data, DB_NEXT)) {
+ if (dbenv->db_feedback != NULL) {
+ progress = 67 + (int)(33 * (__lsn_diff(&first_lsn,
+ &last_lsn, &lsn, log_size, 1) / nfiles));
+ dbenv->db_feedback(dbenv, DB_RECOVER, progress);
+ }
+
+ tlsn = lsn;
+ ret = __db_dispatch(env, &env->recover_dtab,
+ &data, &tlsn, DB_TXN_FORWARD_ROLL, txninfo);
+ if (ret != 0) {
+ if (ret != DB_TXN_CKP)
+ goto msgerr;
+ else
+ ret = 0;
+ }
+ /*
+ * If we are recovering to a timestamp or an LSN,
+ * we need to make sure that we don't try to roll
+ * forward beyond the soon-to-be end of log.
+ */
+ if (LOG_COMPARE(&lsn, &stop_lsn) >= 0)
+ break;
+
+ }
+ if (ret == DB_NOTFOUND)
+ ret = __db_log_corrupt(env, &lsn);
+ if (ret != 0)
+ goto err;
+
+ if (max_lsn == NULL)
+ region->last_txnid = ((DB_TXNHEAD *)txninfo)->maxid;
+
+done:
+ /* We are going to truncate, so we'd best close the cursor. */
+ if (logc != NULL) {
+ if ((ret = __logc_close(logc)) != 0)
+ goto err;
+ logc = NULL;
+ }
+ /*
+ * Also flush the cache before truncating the log. It's recovery,
+ * ignore any application max-write configuration.
+ */
+ if ((ret = __memp_sync_int(env,
+ NULL, 0, DB_SYNC_CACHE | DB_SYNC_SUPPRESS_WRITE, NULL, NULL)) != 0)
+ goto err;
+ if (dbenv->tx_timestamp != 0) {
+ /* Run recovery up to this timestamp. */
+ region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
+ vtrunc_lsn = &((DB_TXNHEAD *)txninfo)->maxlsn;
+ vtrunc_ckp = &((DB_TXNHEAD *)txninfo)->ckplsn;
+ } else if (max_lsn != NULL) {
+ /* This is a HA client syncing to the master. */
+ if (!IS_ZERO_LSN(((DB_TXNHEAD *)txninfo)->ckplsn))
+ region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
+ else if ((ret =
+ __txn_findlastckp(env, &region->last_ckp, max_lsn)) != 0)
+ goto err;
+ vtrunc_lsn = max_lsn;
+ vtrunc_ckp = &((DB_TXNHEAD *)txninfo)->ckplsn;
+ } else {
+ /*
+ * The usual case: we recovered the whole (valid) log; clear
+ * out any partial record after the recovery point.
+ */
+ vtrunc_lsn = &lsn;
+ vtrunc_ckp = &region->last_ckp;
+ }
+ if ((ret = __log_vtruncate(env, vtrunc_lsn, vtrunc_ckp, trunclsn)) != 0)
+ goto err;
+
+ /* If we had no txns, figure out if we need a checkpoint. */
+ if (hi_txn == 0 && __dbreg_log_nofiles(env))
+ LF_SET(DB_NO_CHECKPOINT);
+ /*
+ * Usually we close all files at the end of recovery, unless there are
+ * prepared transactions or errors in the checkpoint.
+ */
+ all_recovered = region->stat.st_nrestores == 0;
+ /*
+ * Log a checkpoint here so subsequent recoveries can skip what's been
+ * done; this is unnecessary for HA rep clients, as they do not write
+ * log records.
+ */
+ if (max_lsn == NULL && !LF_ISSET(DB_NO_CHECKPOINT) &&
+ (ret = __txn_checkpoint(env,
+ 0, 0, DB_CKP_INTERNAL | DB_FORCE)) != 0) {
+ /*
+ * If there was no space for the checkpoint or flushing db
+ * pages we can still bring the environment up, if only for
+ * read-only access. We must not close the open files because a
+ * subsequent recovery might still need to redo this portion
+ * of the log [#18590].
+ */
+ if (max_lsn == NULL && ret == ENOSPC) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY))
+ __db_msg(env, DB_STR_A("1515",
+ "Recovery continuing after non-fatal checkpoint error: %s",
+ "%s"), db_strerror(ret));
+ all_recovered = 0;
+ }
+ else
+ goto err;
+ }
+
+ if (all_recovered ) {
+ /* Close all the db files that are open. */
+ if ((ret = __dbreg_close_files(env, 0)) != 0)
+ goto err;
+ } else {
+ if ((ret = __dbreg_mark_restored(env)) != 0)
+ goto err;
+ F_SET(env->lg_handle, DBLOG_OPENFILES);
+ }
+
+ if (max_lsn != NULL) {
+ /*
+ * Now we need to open files that should be open in order for
+ * client processing to continue. However, since we've
+ * truncated the log, we need to recompute from where the
+ * openfiles pass should begin.
+ */
+ if ((ret = __log_cursor(env, &logc)) != 0)
+ goto err;
+ if ((ret =
+ __logc_get(logc, &first_lsn, &data, DB_FIRST)) != 0) {
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ else
+ __db_errx(env, DB_STR("1516",
+ "First log record not found"));
+ goto err;
+ }
+ if ((ret = __txn_getckp(env, &first_lsn)) == 0 &&
+ (ret = __logc_get(logc, &first_lsn, &data, DB_SET)) == 0) {
+ /* We have a recent checkpoint. This is LSN (1). */
+ if ((ret = __txn_ckp_read(env,
+ data.data, &ckp_args)) != 0) {
+ __db_errx(env, DB_STR_A("1517",
+ "Invalid checkpoint record at [%ld][%ld]",
+ "%ld %ld"), (u_long)first_lsn.file,
+ (u_long)first_lsn.offset);
+ goto err;
+ }
+ first_lsn = ckp_args->ckp_lsn;
+ __os_free(env, ckp_args);
+ }
+ if ((ret = __logc_get(logc, &first_lsn, &data, DB_SET)) != 0)
+ goto err;
+ if ((ret = __env_openfiles(env, logc,
+ txninfo, &data, &first_lsn, max_lsn, nfiles, 1)) != 0)
+ goto err;
+ } else if (all_recovered) {
+ /*
+ * If there are no transactions that need resolution, whether
+ * because they are prepared or because recovery will need to
+ * process them, we need to reset the transaction ID space and
+ * log this fact.
+ */
+ if ((rectype != DB___txn_recycle || hi_txn != 0) &&
+ (ret = __txn_reset(env)) != 0)
+ goto err;
+ } else {
+ if ((ret = __txn_recycle_id(env, 0)) != 0)
+ goto err;
+ }
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) {
+ (void)time(&now);
+ __db_msg(env, DB_STR_A("1518",
+ "Recovery complete at %.24s", "%.24s"),
+ __os_ctime(&now, time_buf));
+ __db_msg(env, DB_STR_A("1519",
+ "Maximum transaction ID %lx recovery checkpoint [%lu][%lu]",
+ "%lx %lu %lu"), (u_long)(txninfo == NULL ?
+ TXN_MINIMUM : ((DB_TXNHEAD *)txninfo)->maxid),
+ (u_long)region->last_ckp.file,
+ (u_long)region->last_ckp.offset);
+ }
+
+ if (0) {
+msgerr: __db_errx(env, DB_STR_A("1520",
+ "Recovery function for LSN %lu %lu failed on %s pass",
+ "%lu %lu %s"), (u_long)lsn.file, (u_long)lsn.offset, pass);
+ }
+
+err: if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if (txninfo != NULL)
+ __db_txnlist_end(env, txninfo);
+
+ dbenv->tx_timestamp = 0;
+
+ F_CLR(env->lg_handle, DBLOG_RECOVER);
+ F_CLR(region, TXN_IN_RECOVERY);
+
+ return (ret);
+}
+
+/*
+ * Figure out how many logfiles we have processed. If we are moving
+ * forward (is_forward != 0), then we're computing current - low. If
+ * we are moving backward, we are computing high - current. max is
+ * the number of bytes per logfile.
+ */
+static double
+__lsn_diff(low, high, current, max, is_forward)
+ DB_LSN *low, *high, *current;
+ u_int32_t max;
+ int is_forward;
+{
+ double nf;
+
+ /*
+ * There are three cases in each direction. If you are in the
+ * same file, then all you need worry about is the difference in
+ * offsets. If you are in different files, then either your offsets
+ * put you either more or less than the integral difference in the
+ * number of files -- we need to handle both of these.
+ */
+ if (is_forward) {
+ if (current->file == low->file)
+ nf = (double)(current->offset - low->offset) / max;
+ else if (current->offset < low->offset)
+ nf = (double)((current->file - low->file) - 1) +
+ (double)((max - low->offset) + current->offset) /
+ max;
+ else
+ nf = (double)(current->file - low->file) +
+ (double)(current->offset - low->offset) / max;
+ } else {
+ if (current->file == high->file)
+ nf = (double)(high->offset - current->offset) / max;
+ else if (current->offset > high->offset)
+ nf = (double)((high->file - current->file) - 1) +
+ (double)
+ ((max - current->offset) + high->offset) / max;
+ else
+ nf = (double)(high->file - current->file) +
+ (double)(high->offset - current->offset) / max;
+ }
+ return (nf);
+}
+
+/*
+ * __log_backup --
+ *
+ * This is used to find the earliest log record to process when a client
+ * is trying to sync up with a master whose max LSN is less than this
+ * client's max lsn; we want to roll back everything after that.
+ *
+ * Find the latest checkpoint whose ckp_lsn is less than the max lsn.
+ */
+static int
+__log_backup(env, logc, max_lsn, start_lsn)
+ ENV *env;
+ DB_LOGC *logc;
+ DB_LSN *max_lsn, *start_lsn;
+{
+ DBT data;
+ DB_LSN lsn;
+ __txn_ckp_args *ckp_args;
+ int ret;
+
+ memset(&data, 0, sizeof(data));
+ ckp_args = NULL;
+
+ if ((ret = __txn_getckp(env, &lsn)) != 0)
+ goto err;
+ while ((ret = __logc_get(logc, &lsn, &data, DB_SET)) == 0) {
+ if ((ret = __txn_ckp_read(env, data.data, &ckp_args)) != 0)
+ return (ret);
+ /*
+ * Follow checkpoints through the log until
+ * we find one with a ckp_lsn less than
+ * or equal max_lsn.
+ */
+ if (LOG_COMPARE(&ckp_args->ckp_lsn, max_lsn) <= 0) {
+ *start_lsn = ckp_args->ckp_lsn;
+ break;
+ }
+
+ lsn = ckp_args->last_ckp;
+ /*
+ * If there are no more checkpoints behind us, we're
+ * done. Break with DB_NOTFOUND.
+ */
+ if (IS_ZERO_LSN(lsn)) {
+ ret = DB_NOTFOUND;
+ break;
+ }
+ __os_free(env, ckp_args);
+ ckp_args = NULL;
+ }
+
+ if (ckp_args != NULL)
+ __os_free(env, ckp_args);
+ /*
+ * If we walked back through all the checkpoints,
+ * set the cursor on the first log record.
+ */
+err: if (IS_ZERO_LSN(*start_lsn) && (ret == 0 || ret == DB_NOTFOUND))
+ ret = __logc_get(logc, start_lsn, &data, DB_FIRST);
+ return (ret);
+}
+
+/*
+ * __log_earliest --
+ *
+ * Return the earliest recovery point for the log files present. The
+ * earliest recovery time is the time stamp of the first checkpoint record
+ * whose checkpoint LSN is greater than the first LSN we process.
+ */
+static int
+__log_earliest(env, logc, lowtime, lowlsn)
+ ENV *env;
+ DB_LOGC *logc;
+ int32_t *lowtime;
+ DB_LSN *lowlsn;
+{
+ __txn_ckp_args *ckpargs;
+ DB_LSN first_lsn, lsn;
+ DBT data;
+ u_int32_t rectype;
+ int cmp, ret;
+
+ memset(&data, 0, sizeof(data));
+
+ /*
+ * Read forward through the log looking for the first checkpoint
+ * record whose ckp_lsn is greater than first_lsn.
+ */
+ for (ret = __logc_get(logc, &first_lsn, &data, DB_FIRST);
+ ret == 0; ret = __logc_get(logc, &lsn, &data, DB_NEXT)) {
+ LOGCOPY_32(env, &rectype, data.data);
+ if (rectype != DB___txn_ckp)
+ continue;
+ if ((ret =
+ __txn_ckp_read(env, data.data, &ckpargs)) == 0) {
+ cmp = LOG_COMPARE(&ckpargs->ckp_lsn, &first_lsn);
+ *lowlsn = ckpargs->ckp_lsn;
+ *lowtime = ckpargs->timestamp;
+
+ __os_free(env, ckpargs);
+ if (cmp >= 0)
+ break;
+ }
+ }
+
+ return (ret);
+}
+
+/*
+ * __env_openfiles --
+ * Perform the pass of recovery that opens files. This is used
+ * both during regular recovery and an initial call to txn_recover (since
+ * we need files open in order to abort prepared, but not yet committed
+ * transactions).
+ *
+ * See the comments in db_apprec for a detailed description of the
+ * various recovery passes.
+ *
+ * If we are not doing feedback processing (i.e., we are doing txn_recover
+ * processing and in_recovery is zero), then last_lsn can be NULL.
+ *
+ * PUBLIC: int __env_openfiles __P((ENV *,
+ * PUBLIC: DB_LOGC *, void *, DBT *, DB_LSN *, DB_LSN *, double, int));
+ */
+int
+__env_openfiles(env, logc, txninfo,
+ data, open_lsn, last_lsn, nfiles, in_recovery)
+ ENV *env;
+ DB_LOGC *logc;
+ void *txninfo;
+ DBT *data;
+ DB_LSN *open_lsn, *last_lsn;
+ double nfiles;
+ int in_recovery;
+{
+ DB_ENV *dbenv;
+ DB_LSN lsn, tlsn;
+ u_int32_t log_size;
+ int progress, ret;
+
+ dbenv = env->dbenv;
+
+ /*
+ * XXX
+ * Get the log size. No locking required because we're single-threaded
+ * during recovery.
+ */
+ log_size = ((LOG *)env->lg_handle->reginfo.primary)->log_size;
+
+ lsn = *open_lsn;
+ for (;;) {
+ if (in_recovery && dbenv->db_feedback != NULL) {
+ DB_ASSERT(env, last_lsn != NULL);
+ progress = (int)(33 * (__lsn_diff(open_lsn,
+ last_lsn, &lsn, log_size, 1) / nfiles));
+ dbenv->db_feedback(dbenv, DB_RECOVER, progress);
+ }
+
+ tlsn = lsn;
+ ret = __db_dispatch(env, &env->recover_dtab, data, &tlsn,
+ in_recovery ? DB_TXN_OPENFILES : DB_TXN_POPENFILES,
+ txninfo);
+ if (ret != 0 && ret != DB_TXN_CKP) {
+ __db_errx(env, DB_STR_A("1521",
+ "Recovery function for LSN %lu %lu failed",
+ "%lu %lu"), (u_long)lsn.file, (u_long)lsn.offset);
+ break;
+ }
+ if ((ret = __logc_get(logc, &lsn, data, DB_NEXT)) != 0) {
+ if (ret == DB_NOTFOUND) {
+ if (last_lsn != NULL &&
+ LOG_COMPARE(&lsn, last_lsn) != 0)
+ ret = __db_log_corrupt(env, &lsn);
+ else
+ ret = 0;
+ }
+ break;
+ }
+ }
+
+ return (ret);
+}
+
+static int
+__db_log_corrupt(env, lsnp)
+ ENV *env;
+ DB_LSN *lsnp;
+{
+ __db_errx(env, DB_STR_A("1522",
+ "Log file corrupt at LSN: [%lu][%lu]", "%lu %lu"),
+ (u_long)lsnp->file, (u_long)lsnp->offset);
+ return (EINVAL);
+}
+
+/*
+ * __env_init_rec --
+ *
+ * PUBLIC: int __env_init_rec __P((ENV *, u_int32_t));
+ */
+int
+__env_init_rec(env, version)
+ ENV *env;
+ u_int32_t version;
+{
+ int ret;
+
+ /*
+ * We need to prime the recovery table with the current recovery
+ * functions. Then we overwrite only specific entries based on
+ * each previous version we support.
+ */
+ if ((ret = __bam_init_recover(env, &env->recover_dtab)) != 0)
+ goto err;
+ if ((ret = __crdel_init_recover(env, &env->recover_dtab)) != 0)
+ goto err;
+ if ((ret = __db_init_recover(env, &env->recover_dtab)) != 0)
+ goto err;
+ if ((ret = __dbreg_init_recover(env, &env->recover_dtab)) != 0)
+ goto err;
+ if ((ret = __fop_init_recover(env, &env->recover_dtab)) != 0)
+ goto err;
+ if ((ret = __ham_init_recover(env, &env->recover_dtab)) != 0)
+ goto err;
+ if ((ret = __heap_init_recover(env, &env->recover_dtab)) != 0)
+ goto err;
+ if ((ret = __qam_init_recover(env, &env->recover_dtab)) != 0)
+ goto err;
+ if ((ret = __repmgr_init_recover(env, &env->recover_dtab)) != 0)
+ goto err;
+ if ((ret = __txn_init_recover(env, &env->recover_dtab)) != 0)
+ goto err;
+
+ /*
+ * After installing all the current recovery routines, we want to
+ * override them with older versions if we are reading a down rev
+ * log (from a downrev replication master). If a log record is
+ * changed then we must use the previous version for all older
+ * logs. If a record is changed in multiple revisions then the
+ * oldest revision that applies must be used. Therefore we override
+ * the recovery functions in reverse log version order.
+ */
+ /*
+ * DB_LOGVERSION_53 is a strict superset of DB_LOGVERSION_50.
+ * So, only check > DB_LOGVERSION_48p2. If/When log records are
+ * altered, the condition below will need to change.
+ */
+ if (version > DB_LOGVERSION_48p2)
+ goto done;
+ if ((ret = __env_init_rec_48(env)) != 0)
+ goto err;
+ /*
+ * Patch 2 added __db_pg_trunc but did not replace any log records
+ * so we want to override the same functions as in the original release.
+ */
+ if (version >= DB_LOGVERSION_48)
+ goto done;
+ if ((ret = __env_init_rec_47(env)) != 0)
+ goto err;
+ if (version == DB_LOGVERSION_47)
+ goto done;
+ if ((ret = __env_init_rec_46(env)) != 0)
+ goto err;
+ /*
+ * There are no log record/recovery differences between 4.4 and 4.5.
+ * The log version changed due to checksum. There are no log recovery
+ * differences between 4.5 and 4.6. The name of the rep_gen in
+ * txn_checkpoint changed (to spare, since we don't use it anymore).
+ */
+ if (version >= DB_LOGVERSION_44)
+ goto done;
+ if ((ret = __env_init_rec_43(env)) != 0)
+ goto err;
+ if (version == DB_LOGVERSION_43)
+ goto done;
+ if (version != DB_LOGVERSION_42) {
+ __db_errx(env, DB_STR_A("1523", "Unknown version %lu",
+ "%lu"), (u_long)version);
+ ret = EINVAL;
+ goto err;
+ }
+ ret = __env_init_rec_42(env);
+
+done:
+err: return (ret);
+}
+
+static int
+__env_init_rec_42(env)
+ ENV *env;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __db_relink_42_recover, DB___db_relink_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __db_pg_alloc_42_recover, DB___db_pg_alloc_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __db_pg_free_42_recover, DB___db_pg_free_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __db_pg_freedata_42_recover, DB___db_pg_freedata_42)) != 0)
+ goto err;
+#ifdef HAVE_HASH
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __ham_metagroup_42_recover, DB___ham_metagroup_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __ham_groupalloc_42_recover, DB___ham_groupalloc_42)) != 0)
+ goto err;
+#endif
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __txn_ckp_42_recover, DB___txn_ckp_42)) != 0)
+ goto err;
+err:
+ return (ret);
+}
+
+static int
+__env_init_rec_43(env)
+ ENV *env;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __bam_relink_43_recover, DB___bam_relink_43)) != 0)
+ goto err;
+ /*
+ * We want to use the 4.2-based txn_regop record.
+ */
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __txn_regop_42_recover, DB___txn_regop_42)) != 0)
+ goto err;
+err:
+ return (ret);
+}
+
+static int
+__env_init_rec_46(env)
+ ENV *env;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __bam_merge_44_recover, DB___bam_merge_44)) != 0)
+ goto err;
+
+err: return (ret);
+}
+
+static int
+__env_init_rec_47(env)
+ ENV *env;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __bam_split_42_recover, DB___bam_split_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __db_pg_sort_44_recover, DB___db_pg_sort_44)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_create_42_recover, DB___fop_create_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_write_42_recover, DB___fop_write_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_rename_42_recover, DB___fop_rename_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_rename_noundo_46_recover, DB___fop_rename_noundo_46)) != 0)
+ goto err;
+
+err:
+ return (ret);
+}
+
+static int
+__env_init_rec_48(env)
+ ENV *env;
+{
+ int ret;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __db_pg_sort_44_recover, DB___db_pg_sort_44)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __db_addrem_42_recover, DB___db_addrem_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __db_big_42_recover, DB___db_big_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __bam_split_48_recover, DB___bam_split_48)) != 0)
+ goto err;
+#ifdef HAVE_HASH
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __ham_insdel_42_recover, DB___ham_insdel_42)) != 0)
+ goto err;
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __ham_replace_42_recover, DB___ham_replace_42)) != 0)
+ goto err;
+#endif
+err:
+ return (ret);
+}
diff --git a/src/env/env_region.c b/src/env/env_region.c
new file mode 100644
index 00000000..113bea21
--- /dev/null
+++ b/src/env/env_region.c
@@ -0,0 +1,1497 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/mp.h"
+#include "dbinc/lock.h"
+#include "dbinc/log.h"
+#include "dbinc/txn.h"
+
+static int __env_des_get __P((ENV *, REGINFO *, REGINFO *, REGION **));
+static int __env_faultmem __P((ENV *, void *, size_t, int));
+static int __env_sys_attach __P((ENV *, REGINFO *, REGION *));
+static int __env_sys_detach __P((ENV *, REGINFO *, int));
+static void __env_des_destroy __P((ENV *, REGION *));
+static void __env_remove_file __P((ENV *));
+
+/*
+ * __env_attach
+ * Join/create the environment
+ *
+ * PUBLIC: int __env_attach __P((ENV *, u_int32_t *, int, int));
+ */
+int
+__env_attach(env, init_flagsp, create_ok, retry_ok)
+ ENV *env;
+ u_int32_t *init_flagsp;
+ int create_ok, retry_ok;
+{
+ DB_ENV *dbenv;
+ REGENV rbuf, *renv;
+ REGENV_REF ref;
+ REGINFO *infop;
+ REGION *rp, tregion;
+ size_t max, nrw, size;
+ long segid;
+ u_int32_t bytes, i, mbytes, nregions, signature;
+ u_int retry_cnt;
+ int majver, minver, patchver, ret;
+ char buf[sizeof(DB_REGION_FMT) + 20];
+
+ /* Initialization */
+ dbenv = env->dbenv;
+ retry_cnt = 0;
+ signature = __env_struct_sig();
+
+ /* Repeated initialization. */
+loop: renv = NULL;
+ rp = NULL;
+
+ /* Set up the ENV's REG_INFO structure. */
+ if ((ret = __os_calloc(env, 1, sizeof(REGINFO), &infop)) != 0)
+ return (ret);
+ infop->env = env;
+ infop->type = REGION_TYPE_ENV;
+ infop->id = REGION_ID_ENV;
+ infop->flags = REGION_JOIN_OK;
+ if (create_ok)
+ F_SET(infop, REGION_CREATE_OK);
+
+ /* Build the region name. */
+ if (F_ISSET(env, ENV_PRIVATE))
+ ret = __os_strdup(env, "process-private", &infop->name);
+ else {
+ (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV);
+ ret = __db_appname(env, DB_APP_NONE, buf, NULL, &infop->name);
+ }
+ if (ret != 0)
+ goto err;
+
+ /*
+ * We have to single-thread the creation of the REGENV region. Once
+ * it exists, we can serialize using region mutexes, but until then
+ * we have to be the only player in the game.
+ *
+ * If this is a private environment, we are only called once and there
+ * are no possible race conditions.
+ *
+ * If this is a public environment, we use the filesystem to ensure
+ * the creation of the environment file is single-threaded.
+ *
+ * If the application has specified their own mapping functions, try
+ * and create the region. The application will have to let us know if
+ * it's actually a creation or not, and we'll have to fall-back to a
+ * join if it's not a create.
+ */
+ if (F_ISSET(env, ENV_PRIVATE) || DB_GLOBAL(j_region_map) != NULL)
+ goto creation;
+
+ /*
+ * Try to create the file, if we have the authority. We have to ensure
+ * that multiple threads/processes attempting to simultaneously create
+ * the file are properly ordered. Open using the O_CREAT and O_EXCL
+ * flags so that multiple attempts to create the region will return
+ * failure in all but one. POSIX 1003.1 requires that EEXIST be the
+ * errno return value -- I sure hope they're right.
+ */
+ if (create_ok) {
+ if ((ret = __os_open(env, infop->name, 0,
+ DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_REGION,
+ env->db_mode, &env->lockfhp)) == 0)
+ goto creation;
+ if (ret != EEXIST) {
+ __db_err(env, ret, "%s", infop->name);
+ goto err;
+ }
+ }
+
+ /* The region must exist, it's not okay to recreate it. */
+ F_CLR(infop, REGION_CREATE_OK);
+
+ /*
+ * If we couldn't create the file, try and open it. (If that fails,
+ * we're done.)
+ */
+ if ((ret = __os_open(
+ env, infop->name, 0, DB_OSO_REGION, 0, &env->lockfhp)) != 0)
+ goto err;
+
+ /*
+ * !!!
+ * The region may be in system memory not backed by the filesystem
+ * (more specifically, not backed by this file), and we're joining
+ * it. In that case, the process that created it will have written
+ * out a REGENV_REF structure as its only contents. We read that
+ * structure before we do anything further, e.g., we can't just map
+ * that file in and then figure out what's going on.
+ *
+ * All of this noise is because some systems don't have a coherent VM
+ * and buffer cache, and what's worse, when you mix operations on the
+ * VM and buffer cache, half the time you hang the system.
+ *
+ * If the file is the size of an REGENV_REF structure, then we know
+ * the real region is in some other memory. (The only way you get a
+ * file that size is to deliberately write it, as it's smaller than
+ * any possible disk sector created by writing a file or mapping the
+ * file into memory.) In which case, retrieve the structure from the
+ * file and use it to acquire the referenced memory.
+ *
+ * If the structure is larger than a REGENV_REF structure, then this
+ * file is backing the shared memory region, and we just map it into
+ * memory.
+ *
+ * And yes, this makes me want to take somebody and kill them. (I
+ * digress -- but you have no freakin' idea. This is unbelievably
+ * stupid and gross, and I've probably spent six months of my life,
+ * now, trying to make different versions of it work.)
+ */
+ if ((ret = __os_ioinfo(env, infop->name,
+ env->lockfhp, &mbytes, &bytes, NULL)) != 0) {
+ __db_err(env, ret, "%s", infop->name);
+ goto err;
+ }
+
+ /*
+ * !!!
+ * A size_t is OK -- regions get mapped into memory, and so can't
+ * be larger than a size_t.
+ */
+ size = mbytes * MEGABYTE + bytes;
+
+ /*
+ * If the size is less than the size of a REGENV_REF structure, the
+ * region (or, possibly, the REGENV_REF structure) has not yet been
+ * completely written. Shouldn't be possible, but there's no reason
+ * not to wait awhile and try again.
+ *
+ * If the region is precisely the size of a ref, then we don't
+ * have the region here, just the meta-data, which implies that
+ * that we are using SYSTEM V shared memory (SYSTEM_MEM). However,
+ * if the flags say that we are using SYSTEM_MEM and the region is
+ * bigger than the ref, something bad has happened -- we are storing
+ * something in the region file other than meta-data and that
+ * shouldn't happen.
+ */
+ if (size < sizeof(ref))
+ goto retry;
+ else {
+
+ if (size == sizeof(ref))
+ F_SET(env, ENV_SYSTEM_MEM);
+ else if (F_ISSET(env, ENV_SYSTEM_MEM)) {
+ ret = EINVAL;
+ __db_err(env, ret, DB_STR_A("1535",
+ "%s: existing environment not created in system memory",
+ "%s"), infop->name);
+ goto err;
+ } else {
+ if ((ret = __os_read(env, env->lockfhp, &rbuf,
+ sizeof(rbuf), &nrw)) != 0 ||
+ nrw < (size_t)sizeof(rbuf) ||
+ (ret = __os_seek(env,
+ env->lockfhp, 0, 0, rbuf.region_off)) != 0) {
+ __db_err(env, ret, DB_STR_A("1536",
+ "%s: unable to read region info", "%s"),
+ infop->name);
+ goto err;
+ }
+ }
+
+ if ((ret = __os_read(env, env->lockfhp, &ref,
+ sizeof(ref), &nrw)) != 0 || nrw < (size_t)sizeof(ref)) {
+ if (ret == 0)
+ ret = EIO;
+ __db_err(env, ret, DB_STR_A("1537",
+ "%s: unable to read system-memory information",
+ "%s"), infop->name);
+ goto err;
+ }
+ size = ref.size;
+ max = ref.max;
+ segid = ref.segid;
+ }
+
+#ifndef HAVE_MUTEX_FCNTL
+ /*
+ * If we're not doing fcntl locking, we can close the file handle. We
+ * no longer need it and the less contact between the buffer cache and
+ * the VM, the better.
+ */
+ (void)__os_closehandle(env, env->lockfhp);
+ env->lockfhp = NULL;
+#endif
+
+ /* Call the region join routine to acquire the region. */
+ memset(&tregion, 0, sizeof(tregion));
+ tregion.size = (roff_t)size;
+ tregion.max = (roff_t)max;
+ tregion.segid = segid;
+ if ((ret = __env_sys_attach(env, infop, &tregion)) != 0)
+ goto err;
+
+user_map_functions:
+ /*
+ * The environment's REGENV structure has to live at offset 0 instead
+ * of the usual alloc information. Set the primary reference and
+ * correct the "head" value to reference the alloc region.
+ */
+ infop->primary = infop->addr;
+ infop->head = (u_int8_t *)infop->addr + sizeof(REGENV);
+ renv = infop->primary;
+
+ /*
+ * Make sure the region matches our build. Special case a region
+ * that's all nul bytes, just treat it like any other corruption.
+ */
+ if (renv->majver != DB_VERSION_MAJOR ||
+ renv->minver != DB_VERSION_MINOR) {
+ if (renv->majver != 0 || renv->minver != 0) {
+ __db_errx(env, DB_STR_A("1538",
+ "Program version %d.%d doesn't match environment version %d.%d",
+ "%d %d %d %d"), DB_VERSION_MAJOR, DB_VERSION_MINOR,
+ renv->majver, renv->minver);
+ ret = DB_VERSION_MISMATCH;
+ } else
+ ret = EINVAL;
+ goto err;
+ }
+ if (renv->signature != signature) {
+ __db_errx(env, DB_STR("1539",
+ "Build signature doesn't match environment"));
+ ret = DB_VERSION_MISMATCH;
+ goto err;
+ }
+
+ /*
+ * Check if the environment has had a catastrophic failure.
+ *
+ * Check the magic number to ensure the region is initialized. If the
+ * magic number isn't set, the lock may not have been initialized, and
+ * an attempt to use it could lead to random behavior.
+ *
+ * The panic and magic values aren't protected by any lock, so we never
+ * use them in any check that's more complex than set/not-set.
+ *
+ * !!!
+ * I'd rather play permissions games using the underlying file, but I
+ * can't because Windows/NT filesystems won't open files mode 0.
+ */
+ if (renv->panic && !F_ISSET(dbenv, DB_ENV_NOPANIC)) {
+ ret = __env_panic_msg(env);
+ goto err;
+ }
+ if (renv->magic != DB_REGION_MAGIC)
+ goto retry;
+
+ /*
+ * Get a reference to the underlying REGION information for this
+ * environment.
+ */
+ if ((ret = __env_des_get(env, infop, infop, &rp)) != 0 || rp == NULL)
+ goto find_err;
+ infop->rp = rp;
+
+ /*
+ * There's still a possibility for inconsistent data. When we acquired
+ * the size of the region and attached to it, it might have still been
+ * growing as part of its creation. We can detect this by checking the
+ * size we originally found against the region's current size. (The
+ * region's current size has to be final, the creator finished growing
+ * it before setting the magic number in the region.)
+ *
+ * !!!
+ * Skip this test when the application specified its own map functions.
+ * The size of the region is essentially unknown in that case: some
+ * other process asked the application's map function for some bytes,
+ * but we were never told the final size of the region. We could get
+ * a size back from the map function, but for all we know, our process'
+ * map function only knows how to join regions, it has no clue how big
+ * those regions are.
+ */
+ if (DB_GLOBAL(j_region_map) == NULL && rp->size != size)
+ goto retry;
+
+ /*
+ * Check our callers configuration flags, it's an error to configure
+ * incompatible or additional subsystems in an existing environment.
+ * Return the total set of flags to the caller so they initialize the
+ * correct set of subsystems.
+ */
+ if (init_flagsp != NULL) {
+ FLD_CLR(*init_flagsp, renv->init_flags);
+ if (*init_flagsp != 0) {
+ __db_errx(env, DB_STR("1540",
+ "configured environment flags incompatible with existing environment"));
+ ret = EINVAL;
+ goto err;
+ }
+ *init_flagsp = renv->init_flags;
+ }
+
+ /*
+ * Fault the pages into memory. Note, do this AFTER releasing the
+ * lock, because we're only reading the pages, not writing them.
+ */
+ (void)__env_faultmem(env, infop->primary, rp->size, 0);
+
+ /* Everything looks good, we're done. */
+ env->reginfo = infop;
+ return (0);
+
+creation:
+ /* Create the environment region. */
+ F_SET(infop, REGION_CREATE);
+
+ /*
+ * Allocate room for REGION structures plus overhead.
+ */
+ memset(&tregion, 0, sizeof(tregion));
+ nregions = __memp_max_regions(env) + 5;
+ size = nregions * sizeof(REGION);
+ size += dbenv->passwd_len;
+ size += (dbenv->thr_max + dbenv->thr_max / 4) *
+ __env_alloc_size(sizeof(DB_THREAD_INFO));
+ /* Space for replication buffer. */
+ if (init_flagsp != NULL && FLD_ISSET(*init_flagsp, DB_INITENV_REP))
+ size += MEGABYTE;
+ size += __txn_region_size(env);
+ size += __log_region_size(env);
+ size += __env_thread_size(env, size);
+ size += __lock_region_size(env, size);
+
+ tregion.size = (roff_t)size;
+ tregion.segid = INVALID_REGION_SEGID;
+
+ if ((tregion.max = dbenv->memory_max) == 0) {
+ /* Add some slop. */
+ size += 16 * 1024;
+ tregion.max = (roff_t)size;
+
+ tregion.max += (roff_t)__lock_region_max(env);
+ tregion.max += (roff_t)__txn_region_max(env);
+ tregion.max += (roff_t)__log_region_max(env);
+ tregion.max += (roff_t)__env_thread_max(env);
+ } else if (tregion.size > tregion.max) {
+ __db_errx(env, DB_STR_A("1542",
+ "Minimum environment memory size %ld is bigger than spcified max %ld.",
+ "%ld %ld"), (u_long)tregion.size, (u_long)tregion.max);
+ ret = EINVAL;
+ goto err;
+ } else if (F_ISSET(env, ENV_PRIVATE))
+ infop->max_alloc = dbenv->memory_max;
+
+ if ((ret = __env_sys_attach(env, infop, &tregion)) != 0)
+ goto err;
+
+ /*
+ * If the application has specified its own mapping functions, we don't
+ * know until we get here if we are creating the region or not. The
+ * way we find out is underlying functions clear the REGION_CREATE flag.
+ */
+ if (!F_ISSET(infop, REGION_CREATE))
+ goto user_map_functions;
+
+ /*
+ * Fault the pages into memory. Note, do this BEFORE we initialize
+ * anything, because we're writing the pages, not just reading them.
+ */
+ (void)__env_faultmem(env, infop->addr, tregion.size, 1);
+
+ /*
+ * The first object in the region is the REGENV structure. This is
+ * different from the other regions, and, from everything else in
+ * this region, where all objects are allocated from the pool, i.e.,
+ * there aren't any fixed locations. The remaining space is made
+ * available for later allocation.
+ *
+ * The allocation space must be size_t aligned, because that's what
+ * the initialization routine is going to store there. To make sure
+ * that happens, the REGENV structure was padded with a final size_t.
+ * No other region needs to worry about it because all of them treat
+ * the entire region as allocation space.
+ *
+ * Set the primary reference and correct the "head" value to reference
+ * the alloc region.
+ */
+ infop->primary = infop->addr;
+ infop->head = (u_int8_t *)infop->addr + sizeof(REGENV);
+ __env_alloc_init(infop, tregion.size - sizeof(REGENV));
+
+ /*
+ * Initialize the rest of the REGENV structure. (Don't set the magic
+ * number to the correct value, that would validate the environment).
+ */
+ renv = infop->primary;
+ renv->magic = 0;
+ renv->panic = 0;
+
+ (void)db_version(&majver, &minver, &patchver);
+ renv->majver = (u_int32_t)majver;
+ renv->minver = (u_int32_t)minver;
+ renv->patchver = (u_int32_t)patchver;
+ renv->signature = signature;
+
+ (void)time(&renv->timestamp);
+ __os_unique_id(env, &renv->envid);
+
+ /*
+ * Initialize init_flags to store the flags that any other environment
+ * handle that uses DB_JOINENV to join this environment will need.
+ */
+ renv->init_flags = (init_flagsp == NULL) ? 0 : *init_flagsp;
+
+ /*
+ * Set up the region array. We use an array rather than a linked list
+ * as we have to traverse this list after failure in some cases, and
+ * we don't want to infinitely loop should the application fail while
+ * we're manipulating the list.
+ */
+ renv->region_cnt = nregions;
+ if ((ret = __env_alloc(infop, nregions * sizeof(REGION), &rp)) != 0) {
+ __db_err(env, ret, DB_STR("1543",
+ "unable to create new master region array"));
+ goto err;
+ }
+ renv->region_off = R_OFFSET(infop, rp);
+ for (i = 0; i < nregions; ++i, ++rp)
+ rp->id = INVALID_REGION_ID;
+
+ renv->cipher_off = renv->thread_off = renv->rep_off = INVALID_ROFF;
+ renv->flags = 0;
+ renv->op_timestamp = renv->rep_timestamp = 0;
+ renv->mtx_regenv = MUTEX_INVALID;
+ renv->reg_panic = 0;
+
+ /*
+ * Get the underlying REGION structure for this environment. Note,
+ * we created the underlying OS region before we acquired the REGION
+ * structure, which is backwards from the normal procedure. Update
+ * the REGION structure.
+ */
+ if ((ret = __env_des_get(env, infop, infop, &rp)) != 0) {
+find_err: __db_errx(env, DB_STR_A("1544",
+ "%s: unable to find environment", "%s"), infop->name);
+ if (ret == 0)
+ ret = EINVAL;
+ goto err;
+ }
+ infop->rp = rp;
+ rp->alloc = rp->size = tregion.size;
+ rp->max = tregion.max;
+ rp->segid = tregion.segid;
+
+ /*
+ * !!!
+ * If we create an environment where regions are public and in system
+ * memory, we have to inform processes joining the environment how to
+ * attach to the shared memory segment. So, we write the shared memory
+ * identifier into the file, to be read by those other processes.
+ *
+ * XXX
+ * This is really OS-layer information, but I can't see any easy way
+ * to move it down there without passing down information that it has
+ * no right to know, e.g., that this is the one-and-only REGENV region
+ * and not some other random region.
+ */
+ if (tregion.segid != INVALID_REGION_SEGID) {
+ ref.size = tregion.size;
+ ref.segid = tregion.segid;
+ ref.max = tregion.max;
+ if ((ret = __os_write(
+ env, env->lockfhp, &ref, sizeof(ref), &nrw)) != 0) {
+ __db_err(env, ret, DB_STR_A("1545",
+ "%s: unable to write out public environment ID",
+ "%s"), infop->name);
+ goto err;
+ }
+ }
+
+#ifndef HAVE_MUTEX_FCNTL
+ /*
+ * If we're not doing fcntl locking, we can close the file handle. We
+ * no longer need it and the less contact between the buffer cache and
+ * the VM, the better.
+ */
+ if (env->lockfhp != NULL) {
+ (void)__os_closehandle(env, env->lockfhp);
+ env->lockfhp = NULL;
+ }
+#endif
+
+ /* Everything looks good, we're done. */
+ env->reginfo = infop;
+ return (0);
+
+err:
+retry: /* Close any open file handle. */
+ if (env->lockfhp != NULL) {
+ (void)__os_closehandle(env, env->lockfhp);
+ env->lockfhp = NULL;
+ }
+
+ /*
+ * If we joined or created the region, detach from it. If we created
+ * it, destroy it. Note, there's a path in the above code where we're
+ * using a temporary REGION structure because we haven't yet allocated
+ * the real one. In that case the region address (addr) will be filled
+ * in, but the REGION pointer (rp) won't. Fix it.
+ */
+ if (infop->addr != NULL) {
+ if (infop->rp == NULL)
+ infop->rp = &tregion;
+
+ (void)__env_sys_detach(env,
+ infop, F_ISSET(infop, REGION_CREATE));
+
+ if (rp != NULL && F_ISSET(env, DB_PRIVATE))
+ __env_alloc_free(infop, rp);
+ }
+
+ /* Free the allocated name and/or REGINFO structure. */
+ if (infop->name != NULL)
+ __os_free(env, infop->name);
+ __os_free(env, infop);
+
+ /* If we had a temporary error, wait awhile and try again. */
+ if (ret == 0) {
+ if (!retry_ok || ++retry_cnt > 3) {
+ __db_errx(env, DB_STR("1546",
+ "unable to join the environment"));
+ ret = EAGAIN;
+ } else {
+ __os_yield(env, retry_cnt * 3, 0);
+ goto loop;
+ }
+ }
+
+ return (ret);
+}
+
+/*
+ * __env_turn_on --
+ * Turn on the created environment.
+ *
+ * PUBLIC: int __env_turn_on __P((ENV *));
+ */
+int
+__env_turn_on(env)
+ ENV *env;
+{
+ REGENV *renv;
+ REGINFO *infop;
+
+ infop = env->reginfo;
+ renv = infop->primary;
+
+ /* If we didn't create the region, there's no need for further work. */
+ if (!F_ISSET(infop, REGION_CREATE))
+ return (0);
+
+ /*
+ * Validate the file. All other threads of control are waiting
+ * on this value to be written -- "Let slip the hounds of war!"
+ */
+ renv->magic = DB_REGION_MAGIC;
+
+ return (0);
+}
+
+/*
+ * __env_turn_off --
+ * Turn off the environment.
+ *
+ * PUBLIC: int __env_turn_off __P((ENV *, u_int32_t));
+ */
+int
+__env_turn_off(env, flags)
+ ENV *env;
+ u_int32_t flags;
+{
+ REGENV *renv;
+ REGINFO *infop;
+ int ret, t_ret;
+
+ ret = 0;
+
+ /*
+ * Connect to the environment: If we can't join the environment, we
+ * guess it's because it doesn't exist and we're done.
+ *
+ * If the environment exists, attach and lock the environment.
+ */
+ if (__env_attach(env, NULL, 0, 1) != 0)
+ return (0);
+
+ infop = env->reginfo;
+ renv = infop->primary;
+
+ MUTEX_LOCK(env, renv->mtx_regenv);
+
+ /*
+ * If the environment is in use, we're done unless we're forcing the
+ * issue or the environment has panic'd. (If the environment panic'd,
+ * the thread holding the reference count may not have cleaned up, so
+ * we clean up. It's possible the application didn't plan on removing
+ * the environment in this particular call, but panic'd environments
+ * aren't useful to anyone.)
+ *
+ * Otherwise, panic the environment and overwrite the magic number so
+ * any thread of control attempting to connect (or racing with us) will
+ * back off and retry, or just die.
+ */
+ if (renv->refcnt > 0 && !LF_ISSET(DB_FORCE) && !renv->panic)
+ ret = EBUSY;
+ else
+ renv->panic = 1;
+
+ /*
+ * Unlock the environment (nobody should need this lock because
+ * we've poisoned the pool) and detach from the environment.
+ */
+ MUTEX_UNLOCK(env, renv->mtx_regenv);
+
+ if ((t_ret = __env_detach(env, 0)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __env_panic_set --
+ * Set/clear unrecoverable error.
+ *
+ * PUBLIC: void __env_panic_set __P((ENV *, int));
+ */
+void
+__env_panic_set(env, on)
+ ENV *env;
+ int on;
+{
+ if (env != NULL && env->reginfo != NULL)
+ ((REGENV *)env->reginfo->primary)->panic = on ? 1 : 0;
+}
+
+/*
+ * __env_ref_increment --
+ * Increment the environment's reference count.
+ *
+ * PUBLIC: int __env_ref_increment __P((ENV *));
+ */
+int
+__env_ref_increment(env)
+ ENV *env;
+{
+ REGENV *renv;
+ REGINFO *infop;
+ int ret;
+
+ infop = env->reginfo;
+ renv = infop->primary;
+
+ /* If we're creating the primary region, allocate a mutex. */
+ if (F_ISSET(infop, REGION_CREATE)) {
+ if ((ret = __mutex_alloc(
+ env, MTX_ENV_REGION, 0, &renv->mtx_regenv)) != 0)
+ return (ret);
+ renv->refcnt = 1;
+ } else {
+ /* Lock the environment, increment the reference, unlock. */
+ MUTEX_LOCK(env, renv->mtx_regenv);
+ ++renv->refcnt;
+ MUTEX_UNLOCK(env, renv->mtx_regenv);
+ }
+
+ F_SET(env, ENV_REF_COUNTED);
+ return (0);
+}
+
+/*
+ * __env_ref_decrement --
+ * Decrement the environment's reference count.
+ *
+ * PUBLIC: int __env_ref_decrement __P((ENV *));
+ */
+int
+__env_ref_decrement(env)
+ ENV *env;
+{
+ REGENV *renv;
+ REGINFO *infop;
+
+ /* Be cautious -- we may not have an environment. */
+ if ((infop = env->reginfo) == NULL)
+ return (0);
+
+ renv = infop->primary;
+
+ /* Even if we have an environment, may not have reference counted it. */
+ if (F_ISSET(env, ENV_REF_COUNTED)) {
+ /* Lock the environment, decrement the reference, unlock. */
+ MUTEX_LOCK(env, renv->mtx_regenv);
+ if (renv->refcnt == 0)
+ __db_errx(env, DB_STR("1547",
+ "environment reference count went negative"));
+ else
+ --renv->refcnt;
+ MUTEX_UNLOCK(env, renv->mtx_regenv);
+
+ F_CLR(env, ENV_REF_COUNTED);
+ }
+
+ /* If a private environment, we're done with the mutex, destroy it. */
+ return (F_ISSET(env, ENV_PRIVATE) ?
+ __mutex_free(env, &renv->mtx_regenv) : 0);
+}
+
+/*
+ * __env_ref_get --
+ * Get the number of environment references. This is an unprotected
+ * read of refcnt to simply provide a spot check of the value. It
+ * is only intended for use as an internal utility routine.
+ *
+ * PUBLIC: int __env_ref_get __P((DB_ENV *, u_int32_t *));
+ */
+int
+__env_ref_get(dbenv, countp)
+ DB_ENV *dbenv;
+ u_int32_t *countp;
+{
+ ENV *env;
+ REGENV *renv;
+ REGINFO *infop;
+
+ env = dbenv->env;
+ infop = env->reginfo;
+ renv = infop->primary;
+ *countp = renv->refcnt;
+ return (0);
+}
+
+/*
+ * __env_detach --
+ * Detach from the environment.
+ *
+ * PUBLIC: int __env_detach __P((ENV *, int));
+ */
+int
+__env_detach(env, destroy)
+ ENV *env;
+ int destroy;
+{
+ REGENV *renv;
+ REGINFO *infop;
+ REGION rp;
+ int ret, t_ret;
+
+ infop = env->reginfo;
+ renv = infop->primary;
+ ret = 0;
+
+ /* Close the locking file handle. */
+ if (env->lockfhp != NULL) {
+ if ((t_ret =
+ __os_closehandle(env, env->lockfhp)) != 0 && ret == 0)
+ ret = t_ret;
+ env->lockfhp = NULL;
+ }
+
+ /*
+ * If a private region, return the memory to the heap. Not needed for
+ * filesystem-backed or system shared memory regions, that memory isn't
+ * owned by any particular process.
+ */
+ if (destroy) {
+ /*
+ * Free the REGION array.
+ *
+ * The actual underlying region structure is allocated from the
+ * primary shared region, and we're about to free it. Save a
+ * copy on our stack for the REGINFO to reference when it calls
+ * down into the OS layer to release the shared memory segment.
+ */
+ rp = *infop->rp;
+ infop->rp = &rp;
+
+ if (renv->region_off != INVALID_ROFF)
+ __env_alloc_free(
+ infop, R_ADDR(infop, renv->region_off));
+ }
+
+ /*
+ * Set the ENV->reginfo field to NULL. BDB uses the ENV->reginfo
+ * field to decide if the underlying region can be accessed or needs
+ * cleanup. We're about to destroy what it references, so it needs to
+ * be cleared.
+ */
+ env->reginfo = NULL;
+ env->thr_hashtab = NULL;
+
+ if ((t_ret = __env_sys_detach(env, infop, destroy)) != 0 && ret == 0)
+ ret = t_ret;
+ if (infop->name != NULL)
+ __os_free(env, infop->name);
+
+ /* Discard the ENV->reginfo field's memory. */
+ __os_free(env, infop);
+
+ return (ret);
+}
+
+/*
+ * __env_remove_env --
+ * Remove an environment.
+ *
+ * PUBLIC: int __env_remove_env __P((ENV *));
+ */
+int
+__env_remove_env(env)
+ ENV *env;
+{
+ DB_ENV *dbenv;
+ REGENV *renv;
+ REGINFO *infop, reginfo;
+ REGION *rp;
+ u_int32_t flags_orig, i;
+
+ dbenv = env->dbenv;
+
+ /*
+ * We do not want to hang on a mutex request, nor do we care about
+ * panics.
+ */
+ flags_orig = F_ISSET(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC);
+ F_SET(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC);
+
+ /*
+ * This routine has to walk a nasty line between not looking into the
+ * environment (which may be corrupted after an app or system crash),
+ * and removing everything that needs removing.
+ *
+ * Connect to the environment: If we can't join the environment, we
+ * guess it's because it doesn't exist. Remove the underlying files,
+ * at least.
+ */
+ if (__env_attach(env, NULL, 0, 0) != 0)
+ goto remfiles;
+
+ infop = env->reginfo;
+ renv = infop->primary;
+
+ /*
+ * Kill the environment, if it's not already dead.
+ */
+ renv->panic = 1;
+
+ /*
+ * Walk the array of regions. Connect to each region and disconnect
+ * with the destroy flag set. This shouldn't cause any problems, even
+ * if the region is corrupted, because we never look inside the region
+ * (with the single exception of mutex regions on systems where we have
+ * to return resources to the underlying system).
+ */
+ for (rp = R_ADDR(infop, renv->region_off),
+ i = 0; i < renv->region_cnt; ++i, ++rp) {
+ if (rp->id == INVALID_REGION_ID || rp->type == REGION_TYPE_ENV)
+ continue;
+ /*
+ * !!!
+ * The REGION_CREATE_OK flag is set for Windows/95 -- regions
+ * are zero'd out when the last reference to the region goes
+ * away, in which case the underlying OS region code requires
+ * callers be prepared to create the region in order to join it.
+ */
+ memset(&reginfo, 0, sizeof(reginfo));
+ reginfo.id = rp->id;
+ reginfo.flags = REGION_CREATE_OK;
+
+ /*
+ * If we get here and can't attach and/or detach to the
+ * region, it's a mess. Ignore errors, there's nothing
+ * we can do about them.
+ */
+ if (__env_region_attach(env, &reginfo, 0, 0) != 0)
+ continue;
+
+#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
+ /*
+ * If destroying the mutex region, return any system
+ * resources to the system.
+ */
+ if (reginfo.type == REGION_TYPE_MUTEX)
+ __mutex_resource_return(env, &reginfo);
+#endif
+ (void)__env_region_detach(env, &reginfo, 1);
+ }
+
+ /* Detach from the environment's primary region. */
+ (void)__env_detach(env, 1);
+
+remfiles:
+ /*
+ * Walk the list of files in the directory, unlinking files in the
+ * Berkeley DB name space.
+ */
+ __env_remove_file(env);
+
+ F_CLR(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC);
+ F_SET(dbenv, flags_orig);
+
+ return (0);
+}
+
+/*
+ * __env_remove_file --
+ * Discard any region files in the filesystem.
+ */
+static void
+__env_remove_file(env)
+ ENV *env;
+{
+ int cnt, fcnt, lastrm, ret;
+ const char *dir;
+ char saved_char, *p, **names, *path, buf[sizeof(DB_REGION_FMT) + 20];
+
+ /* Get the full path of a file in the environment. */
+ (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV);
+ if ((ret = __db_appname(env,
+ DB_APP_NONE, buf, NULL, &path)) != 0)
+ return;
+
+ /* Get the parent directory for the environment. */
+ if ((p = __db_rpath(path)) == NULL) {
+ p = path;
+ saved_char = *p;
+
+ dir = PATH_DOT;
+ } else {
+ saved_char = *p;
+ *p = '\0';
+
+ dir = path;
+ }
+
+ /* Get the list of file names. */
+ if ((ret = __os_dirlist(env, dir, 0, &names, &fcnt)) != 0)
+ __db_err(env, ret, "%s", dir);
+
+ /* Restore the path, and free it. */
+ *p = saved_char;
+ __os_free(env, path);
+
+ if (ret != 0)
+ return;
+
+ /*
+ * Remove files from the region directory.
+ */
+ for (lastrm = -1, cnt = fcnt; --cnt >= 0;) {
+ /* Skip anything outside our name space. */
+ if (!IS_DB_FILE(names[cnt]))
+ continue;
+
+ /* Skip queue extent files. */
+ if (strncmp(names[cnt], "__dbq.", 6) == 0)
+ continue;
+ if (strncmp(names[cnt], "__dbp.", 6) == 0)
+ continue;
+
+ /* Skip registry files. */
+ if (strncmp(names[cnt], "__db.register", 13) == 0)
+ continue;
+
+ /* Skip replication files. */
+ if (strncmp(names[cnt], "__db.rep", 8) == 0)
+ continue;
+
+ /*
+ * Remove the primary environment region last, because it's
+ * the key to this whole mess.
+ */
+ if (strcmp(names[cnt], DB_REGION_ENV) == 0) {
+ lastrm = cnt;
+ continue;
+ }
+
+ /* Remove the file. */
+ if (__db_appname(env,
+ DB_APP_NONE, names[cnt], NULL, &path) == 0) {
+ /*
+ * Overwrite region files. Temporary files would have
+ * been maintained in encrypted format, so there's no
+ * reason to overwrite them. This is not an exact
+ * check on the file being a region file, but it's
+ * not likely to be wrong, and the worst thing that can
+ * happen is we overwrite a file that didn't need to be
+ * overwritten.
+ */
+ (void)__os_unlink(env, path, 1);
+ __os_free(env, path);
+ }
+ }
+
+ if (lastrm != -1)
+ if (__db_appname(env,
+ DB_APP_NONE, names[lastrm], NULL, &path) == 0) {
+ (void)__os_unlink(env, path, 1);
+ __os_free(env, path);
+ }
+ __os_dirfree(env, names, fcnt);
+}
+
+/*
+ * __env_region_attach
+ * Join/create a region.
+ *
+ * PUBLIC: int __env_region_attach __P((ENV *, REGINFO *, size_t, size_t));
+ */
+int
+__env_region_attach(env, infop, init, max)
+ ENV *env;
+ REGINFO *infop;
+ size_t init, max;
+{
+ REGION *rp;
+ int ret;
+ char buf[sizeof(DB_REGION_FMT) + 20];
+
+ /*
+ * Find or create a REGION structure for this region. If we create
+ * it, the REGION_CREATE flag will be set in the infop structure.
+ */
+ F_CLR(infop, REGION_CREATE);
+ if ((ret = __env_des_get(env, env->reginfo, infop, &rp)) != 0)
+ return (ret);
+ infop->env = env;
+ infop->rp = rp;
+ infop->type = rp->type;
+ infop->id = rp->id;
+
+ /*
+ * __env_des_get may have created the region and reset the create
+ * flag. If we're creating the region, set the desired size.
+ */
+ if (F_ISSET(infop, REGION_CREATE)) {
+ rp->alloc = rp->size = (roff_t)init;
+ rp->max = (roff_t)max;
+ }
+
+ /* Join/create the underlying region. */
+ (void)snprintf(buf, sizeof(buf), DB_REGION_FMT, infop->id);
+ if ((ret = __db_appname(env,
+ DB_APP_NONE, buf, NULL, &infop->name)) != 0)
+ goto err;
+ if ((ret = __env_sys_attach(env, infop, rp)) != 0)
+ goto err;
+
+ /*
+ * Fault the pages into memory. Note, do this BEFORE we initialize
+ * anything because we're writing pages in created regions, not just
+ * reading them.
+ */
+ (void)__env_faultmem(env,
+ infop->addr, rp->size, F_ISSET(infop, REGION_CREATE));
+
+ /*
+ * !!!
+ * The underlying layer may have just decided that we are going
+ * to create the region. There are various system issues that
+ * can result in a useless region that requires re-initialization.
+ *
+ * If we created the region, initialize it for allocation.
+ */
+ if (F_ISSET(infop, REGION_CREATE))
+ __env_alloc_init(infop, rp->size);
+
+ return (0);
+
+err: /* Discard the underlying region. */
+ if (infop->addr != NULL)
+ (void)__env_sys_detach(env,
+ infop, F_ISSET(infop, REGION_CREATE));
+ else if (infop->name != NULL) {
+ __os_free(env, infop->name);
+ infop->name = NULL;
+ }
+ infop->rp = NULL;
+ infop->id = INVALID_REGION_ID;
+
+ /* Discard the REGION structure if we created it. */
+ if (F_ISSET(infop, REGION_CREATE)) {
+ __env_des_destroy(env, rp);
+ F_CLR(infop, REGION_CREATE);
+ }
+
+ return (ret);
+}
+
+/*
+ * __env_region_share
+ * Share the primary region.
+ *
+ * PUBLIC: int __env_region_share __P((ENV *, REGINFO *));
+ */
+int
+__env_region_share(env, infop)
+ ENV *env;
+ REGINFO *infop;
+{
+ REGINFO *envinfo;
+ REGION *rp;
+
+ envinfo = env->reginfo;
+ rp = envinfo->rp;
+ F_SET(infop, F_ISSET(envinfo, REGION_CREATE) | REGION_SHARED);
+ infop->addr = envinfo->addr;
+ infop->head = envinfo->head;
+
+ infop->env = env;
+ infop->rp = rp;
+ infop->name = envinfo->name;
+ infop->fhp = envinfo->fhp;
+ infop->type = rp->type;
+ infop->id = rp->id;
+
+ return (0);
+}
+
+/*
+ * __env_region_detach --
+ * Detach from a region.
+ *
+ * PUBLIC: int __env_region_detach __P((ENV *, REGINFO *, int));
+ */
+int
+__env_region_detach(env, infop, destroy)
+ ENV *env;
+ REGINFO *infop;
+ int destroy;
+{
+ REGION *rp;
+ REGION_MEM *mem, *next;
+ int ret;
+
+ if (F_ISSET(env, ENV_PRIVATE))
+ destroy = 1;
+ else if (F_ISSET(infop, REGION_SHARED))
+ return (0);
+
+ rp = infop->rp;
+
+ /*
+ * When discarding the regions as we shut down a database environment,
+ * discard any allocated shared memory segments. This is the last time
+ * we use them, and db_region_destroy is the last region-specific call
+ * we make.
+ */
+ if (F_ISSET(env, ENV_PRIVATE) && infop->primary != NULL) {
+ for (mem = infop->mem; mem != NULL; mem = next) {
+ next = mem->next;
+ __env_alloc_free(infop, mem);
+ }
+ __env_alloc_free(infop, infop->primary);
+ }
+
+ if (F_ISSET(infop, REGION_SHARED))
+ return (0);
+
+ /* Detach from the underlying OS region. */
+ ret = __env_sys_detach(env, infop, destroy);
+
+ /* If we destroyed the region, discard the REGION structure. */
+ if (destroy)
+ __env_des_destroy(env, rp);
+
+ /* Destroy the structure. */
+ if (infop->name != NULL)
+ __os_free(env, infop->name);
+
+ return (ret);
+}
+
+/*
+ * __env_sys_attach --
+ * Prep and call the underlying OS attach function.
+ */
+static int
+__env_sys_attach(env, infop, rp)
+ ENV *env;
+ REGINFO *infop;
+ REGION *rp;
+{
+ int ret;
+
+ /*
+ * All regions are created on 8K boundaries out of sheer paranoia,
+ * so we don't make some underlying VM unhappy. Make sure we don't
+ * overflow or underflow.
+ */
+#define OS_VMPAGESIZE (8 * 1024)
+#define OS_VMROUNDOFF(i) { \
+ if ((i) + OS_VMPAGESIZE - 1 > (i)) \
+ (i) += OS_VMPAGESIZE - 1; \
+ (i) -= (i) % OS_VMPAGESIZE; \
+}
+ if (F_ISSET(infop, REGION_CREATE)) {
+ OS_VMROUNDOFF(rp->size);
+ OS_VMROUNDOFF(rp->max);
+ }
+
+#ifdef DB_REGIONSIZE_MAX
+ /* Some architectures have hard limits on the maximum region size. */
+ if (rp->size > DB_REGIONSIZE_MAX) {
+ __db_errx(env, DB_STR_A("1548",
+ "region size %lu is too large; maximum is %lu", "%lu %lu"),
+ (u_long)rp->size, (u_long)DB_REGIONSIZE_MAX);
+ return (EINVAL);
+ }
+ if (rp->max > DB_REGIONSIZE_MAX) {
+ __db_errx(env, DB_STR_A("1549",
+ "region max %lu is too large; maximum is %lu", "%lu %lu"),
+ (u_long)rp->max, (u_long)DB_REGIONSIZE_MAX);
+ return (EINVAL);
+ }
+#endif
+
+ /*
+ * If a region is private, malloc the memory.
+ *
+ * !!!
+ * If this fails because the region is too large to malloc, mmap(2)
+ * using the MAP_ANON or MAP_ANONYMOUS flags would be an alternative.
+ * I don't know of any architectures (yet!) where malloc is a problem.
+ */
+ if (F_ISSET(env, ENV_PRIVATE)) {
+#if defined(HAVE_MUTEX_HPPA_MSEM_INIT)
+ /*
+ * !!!
+ * There exist spinlocks that don't work in malloc memory, e.g.,
+ * the HP/UX msemaphore interface. If we don't have locks that
+ * will work in malloc memory, we better not be private or not
+ * be threaded.
+ */
+ if (F_ISSET(env, ENV_THREAD)) {
+ __db_errx(env, DB_STR("1550",
+"architecture does not support locks inside process-local (malloc) memory"));
+ __db_errx(env, DB_STR("1551",
+ "application may not specify both DB_PRIVATE and DB_THREAD"));
+ return (EINVAL);
+ }
+#endif
+ if ((ret = __os_malloc(
+ env, sizeof(REGENV), &infop->addr)) != 0)
+ return (ret);
+
+ } else {
+#if !defined(HAVE_MMAP_EXTEND)
+ /* Extend any disk file to its full size before mapping it. */
+ rp->size = rp->max;
+#endif
+ if ((ret = __os_attach(env, infop, rp)) != 0)
+ return (ret);
+ }
+
+ /* Set the start of the allocation region. */
+ infop->head = infop->addr;
+
+ /*
+ * We require that the memory is aligned to fix the largest integral
+ * type. Otherwise, multiple processes mapping the same shared region
+ * would have to memcpy every value before reading it.
+ */
+ if (infop->addr != ALIGNP_INC(infop->addr, sizeof(uintmax_t))) {
+ __db_errx(env, DB_STR("1552",
+ "region memory was not correctly aligned"));
+ (void)__env_sys_detach(env, infop,
+ F_ISSET(infop, REGION_CREATE));
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * __env_sys_detach --
+ * Prep and call the underlying OS detach function.
+ */
+static int
+__env_sys_detach(env, infop, destroy)
+ ENV *env;
+ REGINFO *infop;
+ int destroy;
+{
+
+ /* If a region is private, free the memory. */
+ if (F_ISSET(env, ENV_PRIVATE)) {
+ __os_free(env, infop->addr);
+ return (0);
+ }
+
+ return (__os_detach(env, infop, destroy));
+}
+
+/*
+ * __env_des_get --
+ * Return a reference to the shared information for a REGION,
+ * optionally creating a new entry.
+ */
+static int
+__env_des_get(env, env_infop, infop, rpp)
+ ENV *env;
+ REGINFO *env_infop, *infop;
+ REGION **rpp;
+{
+ REGENV *renv;
+ REGION *rp, *empty_slot, *first_type;
+ u_int32_t i, maxid;
+
+ *rpp = NULL;
+ renv = env_infop->primary;
+
+ /*
+ * If the caller wants to join a region, walk through the existing
+ * regions looking for a matching ID (if ID specified) or matching
+ * type (if type specified). If we return based on a matching type
+ * return the "primary" region, that is, the first region that was
+ * created of this type.
+ *
+ * Track the first empty slot and maximum region ID for new region
+ * allocation.
+ *
+ * MaxID starts at REGION_ID_ENV, the ID of the primary environment.
+ */
+ maxid = REGION_ID_ENV;
+ empty_slot = first_type = NULL;
+ for (rp = R_ADDR(env_infop, renv->region_off),
+ i = 0; i < renv->region_cnt; ++i, ++rp) {
+ if (rp->id == INVALID_REGION_ID) {
+ if (empty_slot == NULL)
+ empty_slot = rp;
+ continue;
+ }
+ if (infop->id != INVALID_REGION_ID) {
+ if (infop->id == rp->id)
+ break;
+ continue;
+ }
+ if (infop->type == rp->type &&
+ F_ISSET(infop, REGION_JOIN_OK) &&
+ (first_type == NULL || first_type->id > rp->id))
+ first_type = rp;
+
+ if (rp->id > maxid)
+ maxid = rp->id;
+ }
+
+ /* If we found a matching ID (or a matching type), return it. */
+ if (i >= renv->region_cnt)
+ rp = first_type;
+ if (rp != NULL) {
+ *rpp = rp;
+ return (0);
+ }
+
+ /*
+ * If we didn't find a region and we don't have permission to create
+ * the region, fail. The caller generates any error message.
+ */
+ if (!F_ISSET(infop, REGION_CREATE_OK))
+ return (ENOENT);
+
+ /*
+ * If we didn't find a region and don't have room to create the region
+ * fail with an error message, there's a sizing problem.
+ */
+ if (empty_slot == NULL) {
+ __db_errx(env, DB_STR("1553",
+ "no room remaining for additional REGIONs"));
+ return (ENOENT);
+ }
+
+ /*
+ * Initialize a REGION structure for the caller. If id was set, use
+ * that value, otherwise we use the next available ID.
+ */
+ memset(empty_slot, 0, sizeof(REGION));
+ empty_slot->segid = INVALID_REGION_SEGID;
+
+ /*
+ * Set the type and ID; if no region ID was specified,
+ * allocate one.
+ */
+ empty_slot->type = infop->type;
+ empty_slot->id = infop->id == INVALID_REGION_ID ? maxid + 1 : infop->id;
+
+ F_SET(infop, REGION_CREATE);
+
+ *rpp = empty_slot;
+ return (0);
+}
+
+/*
+ * __env_des_destroy --
+ * Destroy a reference to a REGION.
+ */
+static void
+__env_des_destroy(env, rp)
+ ENV *env;
+ REGION *rp;
+{
+ COMPQUIET(env, NULL);
+
+ rp->id = INVALID_REGION_ID;
+}
+
+/*
+ * __env_faultmem --
+ * Fault the region into memory.
+ */
+static int
+__env_faultmem(env, addr, size, created)
+ ENV *env;
+ void *addr;
+ size_t size;
+ int created;
+{
+ int ret;
+ u_int8_t *p, *t;
+
+ /* Ignore heap regions. */
+ if (F_ISSET(env, ENV_PRIVATE))
+ return (0);
+
+ /*
+ * It's sometimes significantly faster to page-fault in all of the
+ * region's pages before we run the application, as we see nasty
+ * side-effects when we page-fault while holding various locks, i.e.,
+ * the lock takes a long time to acquire because of the underlying
+ * page fault, and the other threads convoy behind the lock holder.
+ *
+ * If we created the region, we write a non-zero value so that the
+ * system can't cheat. If we're just joining the region, we can
+ * only read the value and try to confuse the compiler sufficiently
+ * that it doesn't figure out that we're never really using it.
+ *
+ * Touch every page (assuming pages are 512B, the smallest VM page
+ * size used in any general purpose processor).
+ */
+ ret = 0;
+ if (F_ISSET(env->dbenv, DB_ENV_REGION_INIT)) {
+ if (created)
+ for (p = addr,
+ t = (u_int8_t *)addr + size; p < t; p += 512)
+ p[0] = 0xdb;
+ else
+ for (p = addr,
+ t = (u_int8_t *)addr + size; p < t; p += 512)
+ ret |= p[0];
+ }
+
+ return (ret);
+}
diff --git a/src/env/env_register.c b/src/env/env_register.c
new file mode 100644
index 00000000..7475444d
--- /dev/null
+++ b/src/env/env_register.c
@@ -0,0 +1,730 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2004, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+#define REGISTER_FILE "__db.register"
+
+#define PID_EMPTY "X 0\n" /* Unused PID entry */
+#define PID_FMT "%24lu\n" /* PID entry format */
+ /* Unused PID test */
+#define PID_ISEMPTY(p) (memcmp(p, PID_EMPTY, PID_LEN) == 0)
+#define PID_LEN (25) /* PID entry length */
+
+#define REGISTRY_LOCK(env, pos, nowait) \
+ __os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), 1, nowait)
+#define REGISTRY_UNLOCK(env, pos) \
+ __os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), 0, 0)
+#define REGISTRY_EXCL_LOCK(env, nowait) \
+ REGISTRY_LOCK(env, 1, nowait)
+#define REGISTRY_EXCL_UNLOCK(env) \
+ REGISTRY_UNLOCK(env, 1)
+
+static int __envreg_add __P((ENV *, int *, u_int32_t));
+static int __envreg_pid_compare __P((const void *, const void *));
+static int __envreg_create_active_pid __P((ENV *, char *));
+
+/*
+ * Support for portable, multi-process database environment locking, based on
+ * the Subversion SR (#11511).
+ *
+ * The registry feature is configured by specifying the DB_REGISTER flag to the
+ * DbEnv.open method. If DB_REGISTER is specified, DB opens the registry file
+ * in the database environment home directory. The registry file is formatted
+ * as follows:
+ *
+ * 12345 # process ID slot 1
+ * X # empty slot
+ * 12346 # process ID slot 2
+ * X # empty slot
+ * 12347 # process ID slot 3
+ * 12348 # process ID slot 4
+ * X 12349 # empty slot
+ * X # empty slot
+ *
+ * All lines are fixed-length. All lines are process ID slots. Empty slots
+ * are marked with leading non-digit characters.
+ *
+ * To modify the file, you get an exclusive lock on the first byte of the file.
+ *
+ * While holding any DbEnv handle, each process has an exclusive lock on the
+ * first byte of a process ID slot. There is a restriction on having more
+ * than one DbEnv handle open at a time, because Berkeley DB uses per-process
+ * locking to implement this feature, that is, a process may never have more
+ * than a single slot locked.
+ *
+ * This work requires that if a process dies or the system crashes, locks held
+ * by the dying processes will be dropped. (We can't use system shared
+ * memory-backed or filesystem-backed locks because they're persistent when a
+ * process dies.) On POSIX systems, we use fcntl(2) locks; on Win32 we have
+ * LockFileEx/UnlockFile, except for Win/9X and Win/ME which have to loop on
+ * Lockfile/UnlockFile.
+ *
+ * We could implement the same solution with flock locking instead of fcntl,
+ * but flock would require a separate file for each process of control (and
+ * probably each DbEnv handle) in the database environment, which is fairly
+ * ugly.
+ *
+ * Whenever a process opens a new DbEnv handle, it walks the registry file and
+ * verifies it CANNOT acquire the lock for any non-empty slot. If a lock for
+ * a non-empty slot is available, we know a process died holding an open handle,
+ * and recovery needs to be run.
+ *
+ * It's possible to get corruption in the registry file. If a write system
+ * call fails after partially completing, there can be corrupted entries in
+ * the registry file, or a partial entry at the end of the file. This is OK.
+ * A corrupted entry will be flagged as a non-empty line during the registry
+ * file walk. Since the line was corrupted by process failure, no process will
+ * hold a lock on the slot, which will lead to recovery being run.
+ *
+ * There can still be processes running in the environment when we recover it,
+ * and, in fact, there can still be processes running in the old environment
+ * after we're up and running in a new one. This is safe because performing
+ * recovery panics (and removes) the existing environment, so the window of
+ * vulnerability is small. Further, we check the panic flag in the DB API
+ * methods, when waking from spinning on a mutex, and whenever we're about to
+ * write to disk). The only window of corruption is if the write check of the
+ * panic were to complete, the region subsequently be recovered, and then the
+ * write continues. That's very, very unlikely to happen. This vulnerability
+ * already exists in Berkeley DB, too, the registry code doesn't make it any
+ * worse than it already is.
+ *
+ * The only way to avoid that window entirely is to ensure that all processes
+ * in the Berkeley DB environment exit before we run recovery. Applications
+ * can do that if they maintain their own process registry outside of Berkeley
+ * DB, but it's a little more difficult to do here. The obvious approach is
+ * to send signals to any process using the database environment as soon as we
+ * decide to run recovery, but there are problems with that approach: we might
+ * not have permission to send signals to the process, the process might have
+ * signal handlers installed, the cookie stored might not be the same as kill's
+ * argument, we may not be able to reliably tell if the process died, and there
+ * are probably other problems. However, if we can send a signal, it reduces
+ * the window, and so we include the code here. To configure it, turn on the
+ * DB_ENVREG_KILL_ALL #define.
+ */
+#define DB_ENVREG_KILL_ALL 0
+
+/*
+ * __envreg_register --
+ * Register a ENV handle.
+ *
+ * PUBLIC: int __envreg_register __P((ENV *, int *, u_int32_t));
+ */
+int
+__envreg_register(env, need_recoveryp, flags)
+ ENV *env;
+ int *need_recoveryp;
+ u_int32_t flags;
+{
+ DB_ENV *dbenv;
+ pid_t pid;
+ u_int32_t bytes, mbytes;
+ int ret;
+ char *pp;
+
+ *need_recoveryp = 0;
+
+ dbenv = env->dbenv;
+ dbenv->thread_id(dbenv, &pid, NULL);
+ pp = NULL;
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1524",
+ "%lu: register environment", "%lu"), (u_long)pid);
+
+ /* Build the path name and open the registry file. */
+ if ((ret = __db_appname(env,
+ DB_APP_NONE, REGISTER_FILE, NULL, &pp)) != 0)
+ goto err;
+ if ((ret = __os_open(env, pp, 0,
+ DB_OSO_CREATE, DB_MODE_660, &dbenv->registry)) != 0)
+ goto err;
+
+ /*
+ * Wait for an exclusive lock on the file.
+ *
+ * !!!
+ * We're locking bytes that don't yet exist, but that's OK as far as
+ * I know.
+ */
+ if ((ret = REGISTRY_EXCL_LOCK(env, 0)) != 0)
+ goto err;
+
+ /*
+ * If the file size is 0, initialize the file.
+ *
+ * Run recovery if we create the file, that means we can clean up the
+ * system by removing the registry file and restarting the application.
+ */
+ if ((ret = __os_ioinfo(
+ env, pp, dbenv->registry, &mbytes, &bytes, NULL)) != 0)
+ goto err;
+ if (mbytes == 0 && bytes == 0) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1525",
+ "%lu: creating %s", "%lu %s"), (u_long)pid, pp);
+ *need_recoveryp = 1;
+ }
+
+ /* Register this process. */
+ if ((ret = __envreg_add(env, need_recoveryp, flags)) != 0)
+ goto err;
+
+ /*
+ * Release our exclusive lock if we don't need to run recovery. If
+ * we need to run recovery, ENV->open will call back into register
+ * code once recovery has completed.
+ */
+ if (*need_recoveryp == 0 && (ret = REGISTRY_EXCL_UNLOCK(env)) != 0)
+ goto err;
+
+ if (0) {
+err: *need_recoveryp = 0;
+
+ /*
+ * !!!
+ * Closing the file handle must release all of our locks.
+ */
+ if (dbenv->registry != NULL)
+ (void)__os_closehandle(env, dbenv->registry);
+ dbenv->registry = NULL;
+ }
+
+ if (pp != NULL)
+ __os_free(env, pp);
+
+ return (ret);
+}
+
+/*
+ * __envreg_add --
+ * Add the process' pid to the register.
+ */
+static int
+__envreg_add(env, need_recoveryp, flags)
+ ENV *env;
+ int *need_recoveryp;
+ u_int32_t flags;
+{
+ DB_ENV *dbenv;
+ DB_THREAD_INFO *ip;
+ REGENV * renv;
+ REGINFO *infop;
+ pid_t pid;
+ off_t end, pos, dead;
+ size_t nr, nw;
+ u_int lcnt;
+ u_int32_t bytes, mbytes, orig_flags;
+ int need_recovery, ret, t_ret;
+ char *p, buf[PID_LEN + 10], pid_buf[PID_LEN + 10];
+
+ dbenv = env->dbenv;
+ need_recovery = 0;
+ COMPQUIET(dead, 0);
+ COMPQUIET(p, NULL);
+ ip = NULL;
+
+ /* Get a copy of our process ID. */
+ dbenv->thread_id(dbenv, &pid, NULL);
+ snprintf(pid_buf, sizeof(pid_buf), PID_FMT, (u_long)pid);
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1526",
+ "%lu: adding self to registry", "%lu"), (u_long)pid);
+
+#if DB_ENVREG_KILL_ALL
+ if (0) {
+kill_all: /*
+ * A second pass through the file, this time killing any
+ * processes still running.
+ */
+ if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+ return (ret);
+ }
+#endif
+
+ /*
+ * Read the file. Skip empty slots, and check that a lock is held
+ * for any allocated slots. An allocated slot which we can lock
+ * indicates a process died holding a handle and recovery needs to
+ * be run.
+ */
+ for (lcnt = 0;; ++lcnt) {
+ if ((ret = __os_read(
+ env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+ return (ret);
+ if (nr == 0)
+ break;
+
+ /*
+ * A partial record at the end of the file is possible if a
+ * previously un-registered process was interrupted while
+ * registering.
+ */
+ if (nr != PID_LEN) {
+ need_recovery = 1;
+ break;
+ }
+
+ if (PID_ISEMPTY(buf)) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1527",
+ "%02u: EMPTY", "%02u"), lcnt);
+ continue;
+ }
+
+ /*
+ * !!!
+ * DB_REGISTER is implemented using per-process locking, only
+ * a single ENV handle may be open per process. Enforce
+ * that restriction.
+ */
+ if (memcmp(buf, pid_buf, PID_LEN) == 0) {
+ __db_errx(env, DB_STR("1528",
+"DB_REGISTER limits processes to one open DB_ENV handle per environment"));
+ return (EINVAL);
+ }
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) {
+ for (p = buf; *p == ' ';)
+ ++p;
+ buf[nr - 1] = '\0';
+ }
+
+#if DB_ENVREG_KILL_ALL
+ if (need_recovery) {
+ pid = (pid_t)strtoul(buf, NULL, 10);
+ (void)kill(pid, SIGKILL);
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1529",
+ "%02u: %s: KILLED", "%02u %s"), lcnt, p);
+ continue;
+ }
+#endif
+ pos = (off_t)lcnt * PID_LEN;
+ if (REGISTRY_LOCK(env, pos, 1) == 0) {
+ if ((ret = REGISTRY_UNLOCK(env, pos)) != 0)
+ return (ret);
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1530",
+ "%02u: %s: FAILED", "%02u %s"), lcnt, p);
+
+ need_recovery = 1;
+ dead = pos;
+#if DB_ENVREG_KILL_ALL
+ goto kill_all;
+#else
+ break;
+#endif
+ } else
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1531",
+ "%02u: %s: LOCKED", "%02u %s"), lcnt, p);
+ }
+
+ /*
+ * If we have to perform recovery...
+ *
+ * Mark all slots empty. Registry ignores empty slots we can't lock,
+ * so it doesn't matter if any of the processes are in the middle of
+ * exiting Berkeley DB -- they'll discard their lock when they exit.
+ */
+ if (need_recovery) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, "%lu: recovery required", (u_long)pid);
+
+ if (LF_ISSET(DB_FAILCHK) || LF_ISSET(DB_FAILCHK_ISALIVE)) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env,
+ "%lu: performing failchk", (u_long)pid);
+
+ if (LF_ISSET(DB_FAILCHK_ISALIVE))
+ if ((ret = __envreg_create_active_pid(
+ env, pid_buf)) != 0)
+ goto sig_proc;
+
+ /* The environment will already exist, so we do not
+ * want DB_CREATE set, nor do we want any recovery at
+ * this point. No need to put values back as flags is
+ * passed in by value. Save original dbenv flags in
+ * case we need to recover/remove existing environment.
+ * Set DB_ENV_FAILCHK before attach to help ensure we
+ * dont block on a mutex held by the dead process.
+ */
+ LF_CLR(DB_CREATE | DB_RECOVER | DB_RECOVER_FATAL);
+ orig_flags = dbenv->flags;
+ F_SET(dbenv, DB_ENV_FAILCHK);
+ /* Attach to environment and subsystems. */
+ if ((ret = __env_attach_regions(
+ dbenv, flags, orig_flags, 0)) != 0)
+ goto sig_proc;
+ if ((t_ret =
+ __env_set_state(env, &ip, THREAD_FAILCHK)) != 0 &&
+ ret == 0)
+ ret = t_ret;
+ if ((t_ret =
+ __env_failchk_int(dbenv)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /* Free active pid array if used. */
+ if (LF_ISSET(DB_FAILCHK_ISALIVE)) {
+ DB_GLOBAL(num_active_pids) = 0;
+ DB_GLOBAL(size_active_pids) = 0;
+ __os_free( env, DB_GLOBAL(active_pids));
+ }
+
+ /* Detach from environment and deregister thread. */
+ if ((t_ret =
+ __env_refresh(dbenv, orig_flags, 0)) != 0 &&
+ ret == 0)
+ ret = t_ret;
+ if (ret == 0) {
+ if ((ret = __os_seek(env, dbenv->registry,
+ 0, 0,(u_int32_t)dead)) != 0 ||
+ (ret = __os_write(env, dbenv->registry,
+ PID_EMPTY, PID_LEN, &nw)) != 0)
+ return (ret);
+ need_recovery = 0;
+ goto add;
+ }
+
+ }
+ /* If we can't attach, then we cannot set DB_REGISTER panic. */
+sig_proc: if (__env_attach(env, NULL, 0, 0) == 0) {
+ infop = env->reginfo;
+ renv = infop->primary;
+ /* Indicate DB_REGSITER panic. Also, set environment
+ * panic as this is the panic trigger mechanism in
+ * the code that everything looks for.
+ */
+ renv->reg_panic = 1;
+ renv->panic = 1;
+ (void)__env_detach(env, 0);
+ }
+
+ /* Wait for processes to see the panic and leave. */
+ __os_yield(env, 0, dbenv->envreg_timeout);
+
+ /* FIGURE out how big the file is. */
+ if ((ret = __os_ioinfo(
+ env, NULL, dbenv->registry, &mbytes, &bytes, NULL)) != 0)
+ return (ret);
+ end = (off_t)mbytes * MEGABYTE + bytes;
+
+ /*
+ * Seek to the beginning of the file and overwrite slots to
+ * the end of the file.
+ *
+ * It's possible for there to be a partial entry at the end of
+ * the file if a process died when trying to register. If so,
+ * correct for it and overwrite it as well.
+ */
+ if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+ return (ret);
+ for (lcnt = 0; lcnt < ((u_int)end / PID_LEN +
+ ((u_int)end % PID_LEN == 0 ? 0 : 1)); ++lcnt) {
+
+ if ((ret = __os_read(
+ env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+ return (ret);
+
+ pos = (off_t)lcnt * PID_LEN;
+ /* do not notify on dead process */
+ if (pos != dead) {
+ pid = (pid_t)strtoul(buf, NULL, 10);
+ DB_EVENT(env, DB_EVENT_REG_ALIVE, &pid);
+ }
+
+ if ((ret = __os_seek(env,
+ dbenv->registry, 0, 0, (u_int32_t)pos)) != 0 ||
+ (ret = __os_write(env,
+ dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0)
+ return (ret);
+ }
+ /* wait one last time to get everyone out */
+ __os_yield(env, 0, dbenv->envreg_timeout);
+ }
+
+ /*
+ * Seek to the first process slot and add ourselves to the first empty
+ * slot we can lock.
+ */
+add: if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+ return (ret);
+ for (lcnt = 0;; ++lcnt) {
+ if ((ret = __os_read(
+ env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+ return (ret);
+ if (nr == PID_LEN && !PID_ISEMPTY(buf))
+ continue;
+ pos = (off_t)lcnt * PID_LEN;
+ if (REGISTRY_LOCK(env, pos, 1) == 0) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1532",
+ "%lu: locking slot %02u at offset %lu",
+ "%lu %02u %lu"), (u_long)pid, lcnt,
+ (u_long)pos);
+
+ if ((ret = __os_seek(env,
+ dbenv->registry, 0, 0, (u_int32_t)pos)) != 0 ||
+ (ret = __os_write(env,
+ dbenv->registry, pid_buf, PID_LEN, &nw)) != 0)
+ return (ret);
+ dbenv->registry_off = (u_int32_t)pos;
+ break;
+ }
+ }
+
+ if (need_recovery)
+ *need_recoveryp = 1;
+
+ return (ret);
+}
+
+/*
+ * __envreg_unregister --
+ * Unregister a ENV handle.
+ *
+ * PUBLIC: int __envreg_unregister __P((ENV *, int));
+ */
+int
+__envreg_unregister(env, recovery_failed)
+ ENV *env;
+ int recovery_failed;
+{
+ DB_ENV *dbenv;
+ size_t nw;
+ int ret, t_ret;
+
+ dbenv = env->dbenv;
+ ret = 0;
+
+ /*
+ * If recovery failed, we want to drop our locks and return, but still
+ * make sure any subsequent process doesn't decide everything is just
+ * fine and try to get into the database environment. In the case of
+ * an error, discard our locks, but leave our slot filled-in.
+ */
+ if (recovery_failed)
+ goto err;
+
+ /*
+ * Why isn't an exclusive lock necessary to discard a ENV handle?
+ *
+ * We mark our process ID slot empty before we discard the process slot
+ * lock, and threads of control reviewing the register file ignore any
+ * slots which they can't lock.
+ */
+ if ((ret = __os_seek(env,
+ dbenv->registry, 0, 0, dbenv->registry_off)) != 0 ||
+ (ret = __os_write(
+ env, dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0)
+ goto err;
+
+ /*
+ * !!!
+ * This code assumes that closing the file descriptor discards all
+ * held locks.
+ *
+ * !!!
+ * There is an ordering problem here -- in the case of a process that
+ * failed in recovery, we're unlocking both the exclusive lock and our
+ * slot lock. If the OS unlocked the exclusive lock and then allowed
+ * another thread of control to acquire the exclusive lock before also
+ * also releasing our slot lock, we could race. That can't happen, I
+ * don't think.
+ */
+err: if ((t_ret =
+ __os_closehandle(env, dbenv->registry)) != 0 && ret == 0)
+ ret = t_ret;
+
+ dbenv->registry = NULL;
+ return (ret);
+}
+
+/*
+ * __envreg_xunlock --
+ * Discard the exclusive lock held by the ENV handle.
+ *
+ * PUBLIC: int __envreg_xunlock __P((ENV *));
+ */
+int
+__envreg_xunlock(env)
+ ENV *env;
+{
+ DB_ENV *dbenv;
+ pid_t pid;
+ int ret;
+
+ dbenv = env->dbenv;
+ dbenv->thread_id(dbenv, &pid, NULL);
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1533",
+ "%lu: recovery completed, unlocking", "%lu"), (u_long)pid);
+
+ if ((ret = REGISTRY_EXCL_UNLOCK(env)) == 0)
+ return (ret);
+
+ __db_err(env, ret, DB_STR_A("1534",
+ "%s: exclusive file unlock", "%s"), REGISTER_FILE);
+ return (__env_panic(env, ret));
+}
+
+/*
+ * __envreg_pid_compare --
+ * Compare routine for qsort and bsearch calls.
+ * returns neg if key is less than membr, 0 if equal and
+ * pos if key is greater than membr.
+ */
+static int
+__envreg_pid_compare(key, membr)
+ const void *key;
+ const void *membr;
+{
+ return ( *(pid_t*)key - *(pid_t*)membr );
+}
+
+/*
+ * __envreg_isalive --
+ * Default isalive function that uses contents of an array of active pids
+ * gotten from the db_register file to determine if process is still
+ * alive.
+ *
+ * PUBLIC: int __envreg_isalive
+ * PUBLIC: __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t));
+ */
+int
+__envreg_isalive(dbenv, pid, tid, flags )
+ DB_ENV *dbenv;
+ pid_t pid;
+ db_threadid_t tid;
+ u_int32_t flags;
+{
+ /* in this case we really do not care about tid, simply for lint */
+ DB_THREADID_INIT(tid);
+
+ /* if is not an expected value then return early */
+ if (!((flags == 0) || (flags == DB_MUTEX_PROCESS_ONLY)))
+ return (EINVAL);
+
+ if (DB_GLOBAL(active_pids) == NULL ||
+ DB_GLOBAL(num_active_pids) == 0 || dbenv == NULL)
+ return (0);
+ /*
+ * bsearch returns a pointer to an entry in active_pids if a match
+ * is found on pid, else no match found it returns NULL. This
+ * routine will return a 1 if a match is found, else a 0.
+ */
+ if (bsearch(&pid, DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids),
+ sizeof(pid_t), __envreg_pid_compare))
+ return 1;
+
+ return (0);
+}
+
+/*
+ * __envreg_create_active_pid --
+ * Create array of pids, if need more room in array then double size.
+ * Only add active pids from DB_REGISTER file into array.
+ */
+static int
+__envreg_create_active_pid(env, my_pid)
+ ENV *env;
+ char *my_pid;
+{
+ DB_ENV *dbenv;
+ char buf[PID_LEN + 10];
+ int ret;
+ off_t pos;
+ pid_t pid, *tmparray;
+ size_t tmpsize, nr;
+ u_int lcnt;
+
+ dbenv = env->dbenv;
+ pos = 0;
+ ret = 0;
+
+ /*
+ * Walk through DB_REGISTER file, we grab pid entries that are locked
+ * as those represent processes that are still alive. Ignore empty
+ * slots, or those that are unlocked.
+ */
+ if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+ return (ret);
+ for (lcnt = 0;; ++lcnt) {
+ if ((ret = __os_read(
+ env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+ return (ret);
+
+ /* all done is read nothing, or get a partial record */
+ if (nr == 0 || nr != PID_LEN)
+ break;
+ if (PID_ISEMPTY(buf))
+ continue;
+
+ pos = (off_t)lcnt * PID_LEN;
+ if (REGISTRY_LOCK(env, pos, 1) == 0) {
+ /* got lock, so process died. Do not add to array */
+ if ((ret = REGISTRY_UNLOCK(env, pos)) != 0)
+ return (ret);
+ } else {
+ /* first, check to make sure we have room in arrary */
+ if (DB_GLOBAL(num_active_pids) + 1 >
+ DB_GLOBAL(size_active_pids)) {
+ tmpsize =
+ DB_GLOBAL(size_active_pids) * sizeof(pid_t);
+
+ /* start with 512, then double if must grow */
+ tmpsize = tmpsize>0 ? tmpsize*2 : 512;
+ if ((ret = __os_malloc
+ (env, tmpsize, &tmparray )) != 0)
+ return (ret);
+
+ /* if array exists, then copy and free */
+ if (DB_GLOBAL(active_pids)) {
+ memcpy( tmparray,
+ DB_GLOBAL(active_pids),
+ DB_GLOBAL(num_active_pids) *
+ sizeof(pid_t));
+ __os_free( env, DB_GLOBAL(active_pids));
+ }
+
+ DB_GLOBAL(active_pids) = tmparray;
+ DB_GLOBAL(size_active_pids) = tmpsize;
+
+ /*
+ * The process getting here has not been added
+ * to the DB_REGISTER file yet, so include it
+ * as the first item in array
+ */
+ if (DB_GLOBAL(num_active_pids) == 0) {
+ pid = (pid_t)strtoul(my_pid, NULL, 10);
+ DB_GLOBAL(active_pids)
+ [DB_GLOBAL(num_active_pids)++] = pid;
+ }
+ }
+
+ /* insert into array */
+ pid = (pid_t)strtoul(buf, NULL, 10);
+ DB_GLOBAL(active_pids)
+ [DB_GLOBAL(num_active_pids)++] = pid;
+
+ }
+
+ }
+
+ /* lets sort the array to allow for binary search in isalive func */
+ qsort(DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids),
+ sizeof(pid_t), __envreg_pid_compare);
+ return (ret);
+}
diff --git a/src/env/env_sig.c b/src/env/env_sig.c
new file mode 100644
index 00000000..6d127f85
--- /dev/null
+++ b/src/env/env_sig.c
@@ -0,0 +1,201 @@
+/*-
+ * DO NOT EDIT: automatically built by dist/s_sig.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/crypto.h"
+#include "dbinc/db_join.h"
+#include "dbinc/db_verify.h"
+#include "dbinc/hash.h"
+#include "dbinc/heap.h"
+#include "dbinc/lock.h"
+#include "dbinc/log_verify.h"
+#include "dbinc/mp.h"
+#include "dbinc/partition.h"
+#include "dbinc/qam.h"
+#include "dbinc/txn.h"
+
+/*
+ * For a pure 32bit/64bit environment, we check all structures and calculate a
+ * signature. For compatible environment, we only check the structures in
+ * shared memory.
+ */
+#ifdef HAVE_MIXED_SIZE_ADDRESSING
+#define __STRUCTURE_COUNT 41
+#else
+#define __STRUCTURE_COUNT (41 + 104)
+#endif
+
+/*
+ * __env_struct_sig --
+ * Compute signature of structures.
+ *
+ * PUBLIC: u_int32_t __env_struct_sig __P((void));
+ */
+u_int32_t
+__env_struct_sig()
+{
+ u_short t[__STRUCTURE_COUNT + 5];
+ u_int i;
+
+ i = 0;
+#define __ADD(s) (t[i++] = sizeof(struct s))
+
+#ifdef HAVE_MUTEX_SUPPORT
+ __ADD(__db_mutex_stat);
+#endif
+ __ADD(__db_lock_stat);
+ __ADD(__db_lock_hstat);
+ __ADD(__db_lock_pstat);
+ __ADD(__db_ilock);
+ __ADD(__db_lock_u);
+ __ADD(__db_lsn);
+ __ADD(__db_log_stat);
+ __ADD(__db_mpool_stat);
+ __ADD(__db_rep_stat);
+ __ADD(__db_repmgr_stat);
+ __ADD(__db_seq_stat);
+ __ADD(__db_bt_stat);
+ __ADD(__db_h_stat);
+ __ADD(__db_heap_stat);
+ __ADD(__db_qam_stat);
+ __ADD(__db_thread_info);
+ __ADD(__db_lockregion);
+ __ADD(__sh_dbt);
+ __ADD(__db_lockobj);
+ __ADD(__db_locker);
+ __ADD(__db_lockpart);
+ __ADD(__db_lock);
+ __ADD(__log);
+ __ADD(__mpool);
+ __ADD(__db_mpool_fstat_int);
+ __ADD(__mpoolfile);
+ __ADD(__bh);
+#ifdef HAVE_MUTEX_SUPPORT
+ __ADD(__db_mutexregion);
+#endif
+#ifdef HAVE_MUTEX_SUPPORT
+ __ADD(__db_mutex_t);
+#endif
+ __ADD(__db_reg_env);
+ __ADD(__db_region);
+ __ADD(__rep);
+ __ADD(__db_txn_stat_int);
+ __ADD(__db_txnregion);
+
+#ifndef HAVE_MIXED_SIZE_ADDRESSING
+ __ADD(__db_dbt);
+ __ADD(__db_lockreq);
+ __ADD(__db_log_cursor);
+ __ADD(__log_rec_spec);
+ __ADD(__db_mpoolfile);
+ __ADD(__db_mpool_fstat);
+ __ADD(__db_txn);
+ __ADD(__kids);
+ __ADD(__my_cursors);
+ __ADD(__femfs);
+ __ADD(__db_preplist);
+ __ADD(__db_txn_active);
+ __ADD(__db_txn_stat);
+ __ADD(__db_txn_token);
+ __ADD(__db_repmgr_site);
+ __ADD(__db_repmgr_conn_err);
+ __ADD(__db_seq_record);
+ __ADD(__db_sequence);
+ __ADD(__db);
+ __ADD(__cq_fq);
+ __ADD(__cq_aq);
+ __ADD(__cq_jq);
+ __ADD(__db_heap_rid);
+ __ADD(__dbc);
+ __ADD(__key_range);
+ __ADD(__db_compact);
+ __ADD(__db_env);
+ __ADD(__db_distab);
+ __ADD(__db_logvrfy_config);
+ __ADD(__db_channel);
+ __ADD(__db_site);
+ __ADD(__fn);
+ __ADD(__db_msgbuf);
+ __ADD(__pin_list);
+ __ADD(__env_thread_info);
+ __ADD(__flag_map);
+ __ADD(__db_backup_handle);
+ __ADD(__env);
+ __ADD(__dbc_internal);
+ __ADD(__dbpginfo);
+ __ADD(__epg);
+ __ADD(__cursor);
+ __ADD(__btree);
+ __ADD(__db_cipher);
+ __ADD(__db_foreign_info);
+ __ADD(__db_txnhead);
+ __ADD(__db_txnlist);
+ __ADD(__join_cursor);
+ __ADD(__pg_chksum);
+ __ADD(__pg_crypto);
+ __ADD(__heaphdr);
+ __ADD(__heaphdrsplt);
+ __ADD(__pglist);
+ __ADD(__vrfy_dbinfo);
+ __ADD(__vrfy_pageinfo);
+ __ADD(__vrfy_childinfo);
+ __ADD(__db_globals);
+ __ADD(__envq);
+ __ADD(__heap);
+ __ADD(__heap_cursor);
+ __ADD(__db_locktab);
+ __ADD(__db_entry);
+ __ADD(__fname);
+ __ADD(__db_log);
+ __ADD(__hdr);
+ __ADD(__log_persist);
+ __ADD(__db_commit);
+ __ADD(__db_filestart);
+ __ADD(__log_rec_hdr);
+ __ADD(__db_log_verify_info);
+ __ADD(__txn_verify_info);
+ __ADD(__lv_filereg_info);
+ __ADD(__lv_filelife);
+ __ADD(__lv_ckp_info);
+ __ADD(__lv_timestamp_info);
+ __ADD(__lv_txnrange);
+ __ADD(__add_recycle_params);
+ __ADD(__ckp_verify_params);
+ __ADD(__db_mpool);
+ __ADD(__db_mpreg);
+ __ADD(__db_mpool_hash);
+ __ADD(__bh_frozen_p);
+ __ADD(__bh_frozen_a);
+#ifdef HAVE_MUTEX_SUPPORT
+ __ADD(__db_mutexmgr);
+#endif
+ __ADD(__fh_t);
+ __ADD(__db_partition);
+ __ADD(__part_internal);
+ __ADD(__qcursor);
+ __ADD(__mpfarray);
+ __ADD(__qmpf);
+ __ADD(__queue);
+ __ADD(__qam_filelist);
+ __ADD(__db_reg_env_ref);
+ __ADD(__db_region_mem_t);
+ __ADD(__db_reginfo_t);
+ __ADD(__rep_waiter);
+ __ADD(__db_rep);
+ __ADD(__rep_lease_entry);
+ __ADD(__txn_detail);
+ __ADD(__db_txnmgr);
+ __ADD(__db_commit_info);
+ __ADD(__txn_logrec);
+#endif
+
+ return (__ham_func5(NULL, t, i * sizeof(t[0])));
+}
diff --git a/src/env/env_stat.c b/src/env/env_stat.c
new file mode 100644
index 00000000..9bc3fe7e
--- /dev/null
+++ b/src/env/env_stat.c
@@ -0,0 +1,879 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
+#include "dbinc/txn.h"
+
+#ifdef HAVE_STATISTICS
+static int __env_print_all __P((ENV *, u_int32_t));
+static int __env_print_dbenv_all __P((ENV *, u_int32_t));
+static int __env_print_env_all __P((ENV *, u_int32_t));
+static int __env_print_fh __P((ENV *));
+static int __env_print_stats __P((ENV *, u_int32_t));
+static int __env_print_thread __P((ENV *));
+static int __env_stat_print __P((ENV *, u_int32_t));
+static char *__env_thread_state_print __P((DB_THREAD_STATE));
+static const char *
+ __reg_type __P((reg_type_t));
+
+/*
+ * __env_stat_print_pp --
+ * ENV->stat_print pre/post processor.
+ *
+ * PUBLIC: int __env_stat_print_pp __P((DB_ENV *, u_int32_t));
+ */
+int
+__env_stat_print_pp(dbenv, flags)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+{
+ DB_THREAD_INFO *ip;
+ ENV *env;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->stat_print");
+
+ if ((ret = __db_fchk(env, "DB_ENV->stat_print",
+ flags, DB_STAT_ALL | DB_STAT_ALLOC |
+ DB_STAT_CLEAR | DB_STAT_SUBSYSTEM)) != 0)
+ return (ret);
+
+ ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env, (__env_stat_print(env, flags)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __env_stat_print --
+ * ENV->stat_print method.
+ */
+static int
+__env_stat_print(env, flags)
+ ENV *env;
+ u_int32_t flags;
+{
+ time_t now;
+ int ret;
+ char time_buf[CTIME_BUFLEN];
+
+ (void)time(&now);
+ __db_msg(env, "%.24s\tLocal time", __os_ctime(&now, time_buf));
+
+ if ((ret = __env_print_stats(env, flags)) != 0)
+ return (ret);
+
+ if (LF_ISSET(DB_STAT_ALL) &&
+ (ret = __env_print_all(env, flags)) != 0)
+ return (ret);
+
+ if ((ret = __env_print_thread(env)) != 0)
+ return (ret);
+
+ if ((ret = __env_print_fh(env)) != 0)
+ return (ret);
+
+ if (!LF_ISSET(DB_STAT_SUBSYSTEM))
+ return (0);
+
+ if (LOGGING_ON(env)) {
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ if ((ret = __log_stat_print(env, flags)) != 0)
+ return (ret);
+
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ if ((ret = __dbreg_stat_print(env, flags)) != 0)
+ return (ret);
+ }
+
+ if (LOCKING_ON(env)) {
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ if ((ret = __lock_stat_print(env, flags)) != 0)
+ return (ret);
+ }
+
+ if (MPOOL_ON(env)) {
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ if ((ret = __memp_stat_print(env, flags)) != 0)
+ return (ret);
+ }
+
+ if (REP_ON(env)) {
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ if ((ret = __rep_stat_print(env, flags)) != 0)
+ return (ret);
+#ifdef HAVE_REPLICATION_THREADS
+ if ((ret = __repmgr_stat_print(env, flags)) != 0)
+ return (ret);
+#endif
+ }
+
+ if (TXN_ON(env)) {
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ if ((ret = __txn_stat_print(env, flags)) != 0)
+ return (ret);
+ }
+
+#ifdef HAVE_MUTEX_SUPPORT
+ /*
+ * Dump the mutexes last. If DB_STAT_CLEAR is set this will
+ * clear out the mutex counters and we want to see them in
+ * the context of the other subsystems first.
+ */
+ if (MUTEX_ON(env)) {
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ if ((ret = __mutex_stat_print(env, flags)) != 0)
+ return (ret);
+ }
+#endif
+
+ return (0);
+}
+
+/*
+ * __env_print_stats --
+ * Display the default environment statistics.
+ *
+ */
+static int
+__env_print_stats(env, flags)
+ ENV *env;
+ u_int32_t flags;
+{
+ REGENV *renv;
+ REGINFO *infop;
+ char time_buf[CTIME_BUFLEN];
+
+ infop = env->reginfo;
+ renv = infop->primary;
+
+ if (LF_ISSET(DB_STAT_ALL)) {
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ __db_msg(env, "Default database environment information:");
+ }
+ STAT_HEX("Magic number", renv->magic);
+ STAT_LONG("Panic value", renv->panic);
+ __db_msg(env, "%d.%d.%d\tEnvironment version",
+ renv->majver, renv->minver, renv->patchver);
+ STAT_LONG("Btree version", DB_BTREEVERSION);
+ STAT_LONG("Hash version", DB_HASHVERSION);
+ STAT_LONG("Lock version", DB_LOCKVERSION);
+ STAT_LONG("Log version", DB_LOGVERSION);
+ STAT_LONG("Queue version", DB_QAMVERSION);
+ STAT_LONG("Sequence version", DB_SEQUENCE_VERSION);
+ STAT_LONG("Txn version", DB_TXNVERSION);
+ __db_msg(env,
+ "%.24s\tCreation time", __os_ctime(&renv->timestamp, time_buf));
+ STAT_HEX("Environment ID", renv->envid);
+ __mutex_print_debug_single(env,
+ "Primary region allocation and reference count mutex",
+ renv->mtx_regenv, flags);
+ STAT_LONG("References", renv->refcnt);
+ __db_dlbytes(env, "Current region size",
+ (u_long)0, (u_long)0, (u_long)infop->rp->size);
+ __db_dlbytes(env, "Maximum region size",
+ (u_long)0, (u_long)0, (u_long)infop->rp->max);
+
+ return (0);
+}
+
+/*
+ * __env_print_all --
+ * Display the debugging environment statistics.
+ */
+static int
+__env_print_all(env, flags)
+ ENV *env;
+ u_int32_t flags;
+{
+ int ret, t_ret;
+
+ /*
+ * There are two structures -- DB_ENV and ENV.
+ */
+ ret = __env_print_dbenv_all(env, flags);
+ if ((t_ret = __env_print_env_all(env, flags)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __env_print_dbenv_all --
+ * Display the debugging environment statistics.
+ */
+static int
+__env_print_dbenv_all(env, flags)
+ ENV *env;
+ u_int32_t flags;
+{
+ static const FN db_env_fn[] = {
+ { DB_ENV_AUTO_COMMIT, "DB_ENV_AUTO_COMMIT" },
+ { DB_ENV_CDB_ALLDB, "DB_ENV_CDB_ALLDB" },
+ { DB_ENV_DIRECT_DB, "DB_ENV_DIRECT_DB" },
+ { DB_ENV_DSYNC_DB, "DB_ENV_DSYNC_DB" },
+ { DB_ENV_MULTIVERSION, "DB_ENV_MULTIVERSION" },
+ { DB_ENV_NOLOCKING, "DB_ENV_NOLOCKING" },
+ { DB_ENV_NOMMAP, "DB_ENV_NOMMAP" },
+ { DB_ENV_NOPANIC, "DB_ENV_NOPANIC" },
+ { DB_ENV_OVERWRITE, "DB_ENV_OVERWRITE" },
+ { DB_ENV_REGION_INIT, "DB_ENV_REGION_INIT" },
+ { DB_ENV_TIME_NOTGRANTED, "DB_ENV_TIME_NOTGRANTED" },
+ { DB_ENV_TXN_NOSYNC, "DB_ENV_TXN_NOSYNC" },
+ { DB_ENV_TXN_NOWAIT, "DB_ENV_TXN_NOWAIT" },
+ { DB_ENV_TXN_SNAPSHOT, "DB_ENV_TXN_SNAPSHOT" },
+ { DB_ENV_TXN_WRITE_NOSYNC, "DB_ENV_TXN_WRITE_NOSYNC" },
+ { DB_ENV_YIELDCPU, "DB_ENV_YIELDCPU" },
+ { 0, NULL }
+ };
+ static const FN vfn[] = {
+ { DB_VERB_DEADLOCK, "DB_VERB_DEADLOCK" },
+ { DB_VERB_FILEOPS, "DB_VERB_FILEOPS" },
+ { DB_VERB_FILEOPS_ALL, "DB_VERB_FILEOPS_ALL" },
+ { DB_VERB_RECOVERY, "DB_VERB_RECOVERY" },
+ { DB_VERB_REGISTER, "DB_VERB_REGISTER" },
+ { DB_VERB_REPLICATION, "DB_VERB_REPLICATION" },
+ { DB_VERB_REP_ELECT, "DB_VERB_REP_ELECT" },
+ { DB_VERB_REP_LEASE, "DB_VERB_REP_LEASE" },
+ { DB_VERB_REP_MISC, "DB_VERB_REP_MISC" },
+ { DB_VERB_REP_MSGS, "DB_VERB_REP_MSGS" },
+ { DB_VERB_REP_SYNC, "DB_VERB_REP_SYNC" },
+ { DB_VERB_REP_SYSTEM, "DB_VERB_REP_SYSTEM" },
+ { DB_VERB_REP_TEST, "DB_VERB_REP_TEST" },
+ { DB_VERB_REPMGR_CONNFAIL, "DB_VERB_REPMGR_CONNFAIL" },
+ { DB_VERB_REPMGR_MISC, "DB_VERB_REPMGR_MISC" },
+ { DB_VERB_WAITSFOR, "DB_VERB_WAITSFOR" },
+ { 0, NULL }
+ };
+ DB_ENV *dbenv;
+ DB_MSGBUF mb;
+ char **p;
+
+ dbenv = env->dbenv;
+ DB_MSGBUF_INIT(&mb);
+
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ STAT_POINTER("ENV", dbenv->env);
+ __mutex_print_debug_single(
+ env, "DB_ENV handle mutex", dbenv->mtx_db_env, flags);
+ STAT_ISSET("Errcall", dbenv->db_errcall);
+ STAT_ISSET("Errfile", dbenv->db_errfile);
+ STAT_STRING("Errpfx", dbenv->db_errpfx);
+ STAT_ISSET("Msgfile", dbenv->db_msgfile);
+ STAT_ISSET("Msgcall", dbenv->db_msgcall);
+
+ STAT_ISSET("AppDispatch", dbenv->app_dispatch);
+ STAT_ISSET("Event", dbenv->db_event_func);
+ STAT_ISSET("Feedback", dbenv->db_feedback);
+ STAT_ISSET("Free", dbenv->db_free);
+ STAT_ISSET("Panic", dbenv->db_paniccall);
+ STAT_ISSET("Malloc", dbenv->db_malloc);
+ STAT_ISSET("Realloc", dbenv->db_realloc);
+ STAT_ISSET("IsAlive", dbenv->is_alive);
+ STAT_ISSET("ThreadId", dbenv->thread_id);
+ STAT_ISSET("ThreadIdString", dbenv->thread_id_string);
+
+ STAT_STRING("Log dir", dbenv->db_log_dir);
+ STAT_STRING("Metadata dir", dbenv->db_md_dir);
+ STAT_STRING("Tmp dir", dbenv->db_tmp_dir);
+ if (dbenv->db_data_dir == NULL)
+ STAT_ISSET("Data dir", dbenv->db_data_dir);
+ else {
+ for (p = dbenv->db_data_dir; *p != NULL; ++p)
+ __db_msgadd(env, &mb, "%s\tData dir", *p);
+ DB_MSGBUF_FLUSH(env, &mb);
+ }
+
+ STAT_STRING(
+ "Intermediate directory mode", dbenv->intermediate_dir_mode);
+
+ STAT_LONG("Shared memory key", dbenv->shm_key);
+
+ STAT_ISSET("Password", dbenv->passwd);
+
+ STAT_ISSET("App private", dbenv->app_private);
+ STAT_ISSET("Api1 internal", dbenv->api1_internal);
+ STAT_ISSET("Api2 internal", dbenv->api2_internal);
+
+ __db_prflags(env, NULL, dbenv->verbose, vfn, NULL, "\tVerbose flags");
+
+ STAT_ULONG("Mutex align", dbenv->mutex_align);
+ STAT_ULONG("Mutex cnt", dbenv->mutex_cnt);
+ STAT_ULONG("Mutex inc", dbenv->mutex_inc);
+ STAT_ULONG("Mutex tas spins", dbenv->mutex_tas_spins);
+
+ STAT_ISSET("Lock conflicts", dbenv->lk_conflicts);
+ STAT_LONG("Lock modes", dbenv->lk_modes);
+ STAT_ULONG("Lock detect", dbenv->lk_detect);
+ STAT_ULONG("Lock init", dbenv->lk_init);
+ STAT_ULONG("Lock init lockers", dbenv->lk_init_lockers);
+ STAT_ULONG("Lock init objects", dbenv->lk_init_objects);
+ STAT_ULONG("Lock max", dbenv->lk_max);
+ STAT_ULONG("Lock max lockers", dbenv->lk_max_lockers);
+ STAT_ULONG("Lock max objects", dbenv->lk_max_objects);
+ STAT_ULONG("Lock partitions", dbenv->lk_partitions);
+ STAT_ULONG("Lock object hash table size", dbenv->object_t_size);
+ STAT_ULONG("Lock timeout", dbenv->lk_timeout);
+
+ STAT_ULONG("Log bsize", dbenv->lg_bsize);
+ STAT_FMT("Log file mode", "%#o", int, dbenv->lg_filemode);
+ STAT_ULONG("Log region max", dbenv->lg_regionmax);
+ STAT_ULONG("Log size", dbenv->lg_size);
+
+ STAT_ULONG("Cache GB", dbenv->mp_gbytes);
+ STAT_ULONG("Cache B", dbenv->mp_bytes);
+ STAT_ULONG("Cache max GB", dbenv->mp_max_gbytes);
+ STAT_ULONG("Cache max B", dbenv->mp_max_bytes);
+ STAT_ULONG("Cache mmap size", dbenv->mp_mmapsize);
+ STAT_ULONG("Cache max open fd", dbenv->mp_maxopenfd);
+ STAT_ULONG("Cache max write", dbenv->mp_maxwrite);
+ STAT_ULONG("Cache number", dbenv->mp_ncache);
+ STAT_ULONG("Cache max write sleep", dbenv->mp_maxwrite_sleep);
+
+ STAT_ULONG("Txn init", dbenv->tx_init);
+ STAT_ULONG("Txn max", dbenv->tx_max);
+ STAT_ULONG("Txn timestamp", dbenv->tx_timestamp);
+ STAT_ULONG("Txn timeout", dbenv->tx_timeout);
+
+ STAT_ULONG("Thread count", dbenv->thr_max);
+
+ STAT_ISSET("Registry", dbenv->registry);
+ STAT_ULONG("Registry offset", dbenv->registry_off);
+ STAT_ULONG("Registry timeout", dbenv->envreg_timeout);
+
+ __db_prflags(env,
+ NULL, dbenv->flags, db_env_fn, NULL, "\tPublic environment flags");
+
+ return (0);
+}
+
+/*
+ * __env_print_env_all --
+ * Display the debugging environment statistics.
+ */
+static int
+__env_print_env_all(env, flags)
+ ENV *env;
+ u_int32_t flags;
+{
+ static const FN env_fn[] = {
+ { ENV_CDB, "ENV_CDB" },
+ { ENV_DBLOCAL, "ENV_DBLOCAL" },
+ { ENV_LOCKDOWN, "ENV_LOCKDOWN" },
+ { ENV_NO_OUTPUT_SET, "ENV_NO_OUTPUT_SET" },
+ { ENV_OPEN_CALLED, "ENV_OPEN_CALLED" },
+ { ENV_PRIVATE, "ENV_PRIVATE" },
+ { ENV_RECOVER_FATAL, "ENV_RECOVER_FATAL" },
+ { ENV_REF_COUNTED, "ENV_REF_COUNTED" },
+ { ENV_SYSTEM_MEM, "ENV_SYSTEM_MEM" },
+ { ENV_THREAD, "ENV_THREAD" },
+ { 0, NULL }
+ };
+ static const FN ofn[] = {
+ { DB_CREATE, "DB_CREATE" },
+ { DB_FORCE, "DB_FORCE" },
+ { DB_INIT_CDB, "DB_INIT_CDB" },
+ { DB_INIT_LOCK, "DB_INIT_LOCK" },
+ { DB_INIT_LOG, "DB_INIT_LOG" },
+ { DB_INIT_MPOOL, "DB_INIT_MPOOL" },
+ { DB_INIT_REP, "DB_INIT_REP" },
+ { DB_INIT_TXN, "DB_INIT_TXN" },
+ { DB_LOCKDOWN, "DB_LOCKDOWN" },
+ { DB_NOMMAP, "DB_NOMMAP" },
+ { DB_PRIVATE, "DB_PRIVATE" },
+ { DB_RDONLY, "DB_RDONLY" },
+ { DB_RECOVER, "DB_RECOVER" },
+ { DB_RECOVER_FATAL, "DB_RECOVER_FATAL" },
+ { DB_SYSTEM_MEM, "DB_SYSTEM_MEM" },
+ { DB_THREAD, "DB_THREAD" },
+ { DB_TRUNCATE, "DB_TRUNCATE" },
+ { DB_TXN_NOSYNC, "DB_TXN_NOSYNC" },
+ { DB_USE_ENVIRON, "DB_USE_ENVIRON" },
+ { DB_USE_ENVIRON_ROOT, "DB_USE_ENVIRON_ROOT" },
+ { 0, NULL }
+ };
+ static const FN regenvfn[] = {
+ { DB_REGENV_REPLOCKED, "DB_REGENV_REPLOCKED" },
+ { 0, NULL }
+ };
+ REGENV *renv;
+ REGINFO *infop;
+ REGION *rp;
+ u_int32_t i;
+ char time_buf[CTIME_BUFLEN];
+
+ infop = env->reginfo;
+ renv = infop->primary;
+
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ STAT_POINTER("DB_ENV", env->dbenv);
+ __mutex_print_debug_single(
+ env, "ENV handle mutex", env->mtx_env, flags);
+
+ STAT_STRING("Home", env->db_home);
+ __db_prflags(env, NULL, env->open_flags, ofn, NULL, "\tOpen flags");
+ STAT_FMT("Mode", "%#o", int, env->db_mode);
+
+ STAT_ULONG("Pid cache", env->pid_cache);
+
+ STAT_ISSET("Lockfhp", env->lockfhp);
+
+ STAT_ISSET("Locker", env->env_lref);
+
+ STAT_ISSET("Internal recovery table", env->recover_dtab.int_dispatch);
+ STAT_ULONG("Number of recovery table slots",
+ env->recover_dtab.int_size);
+ STAT_ISSET("External recovery table", env->recover_dtab.ext_dispatch);
+ STAT_ULONG("Number of recovery table slots",
+ env->recover_dtab.ext_size);
+
+ STAT_ULONG("Thread hash buckets", env->thr_nbucket);
+ STAT_ISSET("Thread hash table", env->thr_hashtab);
+
+ __mutex_print_debug_single(
+ env, "ENV list of DB handles mutex", env->mtx_dblist, flags);
+ STAT_LONG("DB reference count", env->db_ref);
+
+ __mutex_print_debug_single(env, "MT mutex", env->mtx_mt, flags);
+
+ STAT_ISSET("Crypto handle", env->crypto_handle);
+ STAT_ISSET("Lock handle", env->lk_handle);
+ STAT_ISSET("Log handle", env->lg_handle);
+ STAT_ISSET("Cache handle", env->mp_handle);
+ STAT_ISSET("Mutex handle", env->mutex_handle);
+ STAT_ISSET("Replication handle", env->rep_handle);
+ STAT_ISSET("Txn handle", env->tx_handle);
+
+ STAT_ISSET("User copy", env->dbt_usercopy);
+
+ STAT_LONG("Test abort", env->test_abort);
+ STAT_LONG("Test check", env->test_check);
+ STAT_LONG("Test copy", env->test_copy);
+
+ __db_prflags(env,
+ NULL, env->flags, env_fn, NULL, "\tPrivate environment flags");
+
+ __db_print_reginfo(env, infop, "Primary", flags);
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ __db_msg(env, "Per region database environment information:");
+ for (rp = R_ADDR(infop, renv->region_off),
+ i = 0; i < renv->region_cnt; ++i, ++rp) {
+ if (rp->id == INVALID_REGION_ID)
+ continue;
+ __db_msg(env, "%s Region:", __reg_type(rp->type));
+ STAT_LONG("Region ID", rp->id);
+ STAT_LONG("Segment ID", rp->segid);
+ __db_dlbytes(env,
+ "Size", (u_long)0, (u_long)0, (u_long)rp->size);
+ }
+ __db_prflags(env,
+ NULL, renv->init_flags, ofn, NULL, "\tInitialization flags");
+ STAT_ULONG("Region slots", renv->region_cnt);
+ __db_prflags(env,
+ NULL, renv->flags, regenvfn, NULL, "\tReplication flags");
+ __db_msg(env, "%.24s\tOperation timestamp",
+ renv->op_timestamp == 0 ?
+ "!Set" : __os_ctime(&renv->op_timestamp, time_buf));
+ __db_msg(env, "%.24s\tReplication timestamp",
+ renv->rep_timestamp == 0 ?
+ "!Set" : __os_ctime(&renv->rep_timestamp, time_buf));
+
+ return (0);
+}
+
+static char *
+__env_thread_state_print(state)
+ DB_THREAD_STATE state;
+{
+ switch (state) {
+ case THREAD_ACTIVE:
+ return ("active");
+ case THREAD_BLOCKED:
+ return ("blocked");
+ case THREAD_BLOCKED_DEAD:
+ return ("blocked and dead");
+ case THREAD_OUT:
+ return ("out");
+ default:
+ return ("unknown");
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * __env_print_thread --
+ * Display the thread block state.
+ */
+static int
+__env_print_thread(env)
+ ENV *env;
+{
+ BH *bhp;
+ DB_ENV *dbenv;
+ DB_HASHTAB *htab;
+ DB_MPOOL *dbmp;
+ DB_THREAD_INFO *ip;
+ PIN_LIST *list, *lp;
+ REGENV *renv;
+ REGINFO *infop;
+ THREAD_INFO *thread;
+ u_int32_t i;
+ char buf[DB_THREADID_STRLEN];
+
+ dbenv = env->dbenv;
+
+ /* The thread table may not be configured. */
+ if ((htab = env->thr_hashtab) == NULL)
+ return (0);
+
+ dbmp = env->mp_handle;
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ __db_msg(env, "Thread tracking information");
+
+ /* Dump out the info we have on thread tracking. */
+ infop = env->reginfo;
+ renv = infop->primary;
+ thread = R_ADDR(infop, renv->thread_off);
+ STAT_ULONG("Thread blocks allocated", thread->thr_count);
+ STAT_ULONG("Thread allocation threshold", thread->thr_max);
+ STAT_ULONG("Thread hash buckets", thread->thr_nbucket);
+
+ /* Dump out the info we have on active threads. */
+ __db_msg(env, "Thread status blocks:");
+ for (i = 0; i < env->thr_nbucket; i++)
+ SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) {
+ if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE)
+ continue;
+ __db_msg(env, "\tprocess/thread %s: %s",
+ dbenv->thread_id_string(
+ dbenv, ip->dbth_pid, ip->dbth_tid, buf),
+ __env_thread_state_print(ip->dbth_state));
+ list = R_ADDR(env->reginfo, ip->dbth_pinlist);
+ for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) {
+ if (lp->b_ref == INVALID_ROFF)
+ continue;
+ bhp = R_ADDR(
+ &dbmp->reginfo[lp->region], lp->b_ref);
+ __db_msg(env,
+ "\t\tpins: %lu", (u_long)bhp->pgno);
+ }
+ }
+ return (0);
+}
+
+/*
+ * __env_print_fh --
+ * Display statistics for all handles open in this environment.
+ */
+static int
+__env_print_fh(env)
+ ENV *env;
+{
+ DB_FH *fhp;
+
+ if (TAILQ_FIRST(&env->fdlist) == NULL)
+ return (0);
+
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ __db_msg(env, "Environment file handle information");
+
+ MUTEX_LOCK(env, env->mtx_env);
+
+ TAILQ_FOREACH(fhp, &env->fdlist, q)
+ __db_print_fh(env, NULL, fhp, 0);
+
+ MUTEX_UNLOCK(env, env->mtx_env);
+
+ return (0);
+}
+
+/*
+ * __db_print_fh --
+ * Print out a file handle.
+ *
+ * PUBLIC: void __db_print_fh __P((ENV *, const char *, DB_FH *, u_int32_t));
+ */
+void
+__db_print_fh(env, tag, fh, flags)
+ ENV *env;
+ const char *tag;
+ DB_FH *fh;
+ u_int32_t flags;
+{
+ static const FN fn[] = {
+ { DB_FH_NOSYNC, "DB_FH_NOSYNC" },
+ { DB_FH_OPENED, "DB_FH_OPENED" },
+ { DB_FH_UNLINK, "DB_FH_UNLINK" },
+ { 0, NULL }
+ };
+
+ if (fh == NULL) {
+ STAT_ISSET(tag, fh);
+ return;
+ }
+
+ STAT_STRING("file-handle.file name", fh->name);
+
+ __mutex_print_debug_single(
+ env, "file-handle.mutex", fh->mtx_fh, flags);
+
+ STAT_LONG("file-handle.reference count", fh->ref);
+ STAT_LONG("file-handle.file descriptor", fh->fd);
+
+ STAT_ULONG("file-handle.page number", fh->pgno);
+ STAT_ULONG("file-handle.page size", fh->pgsize);
+ STAT_ULONG("file-handle.page offset", fh->offset);
+
+ STAT_ULONG("file-handle.seek count", fh->seek_count);
+ STAT_ULONG("file-handle.read count", fh->read_count);
+ STAT_ULONG("file-handle.write count", fh->write_count);
+
+ __db_prflags(env, NULL, fh->flags, fn, NULL, "\tfile-handle.flags");
+}
+
+/*
+ * __db_print_fileid --
+ * Print out a file ID.
+ *
+ * PUBLIC: void __db_print_fileid __P((ENV *, u_int8_t *, const char *));
+ */
+void
+__db_print_fileid(env, id, suffix)
+ ENV *env;
+ u_int8_t *id;
+ const char *suffix;
+{
+ DB_MSGBUF mb;
+ int i;
+
+ if (id == NULL) {
+ STAT_ISSET("ID", id);
+ return;
+ }
+
+ DB_MSGBUF_INIT(&mb);
+ for (i = 0; i < DB_FILE_ID_LEN; ++i, ++id) {
+ __db_msgadd(env, &mb, "%x", (u_int)*id);
+ if (i < DB_FILE_ID_LEN - 1)
+ __db_msgadd(env, &mb, " ");
+ }
+ if (suffix != NULL)
+ __db_msgadd(env, &mb, "%s", suffix);
+ DB_MSGBUF_FLUSH(env, &mb);
+}
+
+/*
+ * __db_dl --
+ * Display a big value.
+ *
+ * PUBLIC: void __db_dl __P((ENV *, const char *, u_long));
+ */
+void
+__db_dl(env, msg, value)
+ ENV *env;
+ const char *msg;
+ u_long value;
+{
+ /*
+ * Two formats: if less than 10 million, display as the number, if
+ * greater than 10 million display as ###M.
+ */
+ if (value < 10000000)
+ __db_msg(env, "%lu\t%s", value, msg);
+ else
+ __db_msg(env, "%luM\t%s (%lu)", value / 1000000, msg, value);
+}
+
+/*
+ * __db_dl_pct --
+ * Display a big value, and related percentage.
+ *
+ * PUBLIC: void __db_dl_pct
+ * PUBLIC: __P((ENV *, const char *, u_long, int, const char *));
+ */
+void
+__db_dl_pct(env, msg, value, pct, tag)
+ ENV *env;
+ const char *msg, *tag;
+ u_long value;
+ int pct;
+{
+ DB_MSGBUF mb;
+
+ DB_MSGBUF_INIT(&mb);
+
+ /*
+ * Two formats: if less than 10 million, display as the number, if
+ * greater than 10 million, round it off and display as ###M.
+ */
+ if (value < 10000000)
+ __db_msgadd(env, &mb, "%lu\t%s", value, msg);
+ else
+ __db_msgadd(env,
+ &mb, "%luM\t%s", (value + 500000) / 1000000, msg);
+ if (tag == NULL)
+ __db_msgadd(env, &mb, " (%d%%)", pct);
+ else
+ __db_msgadd(env, &mb, " (%d%% %s)", pct, tag);
+
+ DB_MSGBUF_FLUSH(env, &mb);
+}
+
+/*
+ * __db_dlbytes --
+ * Display a big number of bytes.
+ *
+ * PUBLIC: void __db_dlbytes
+ * PUBLIC: __P((ENV *, const char *, u_long, u_long, u_long));
+ */
+void
+__db_dlbytes(env, msg, gbytes, mbytes, bytes)
+ ENV *env;
+ const char *msg;
+ u_long gbytes, mbytes, bytes;
+{
+ DB_MSGBUF mb;
+ const char *sep;
+
+ DB_MSGBUF_INIT(&mb);
+
+ /* Normalize the values. */
+ while (bytes >= MEGABYTE) {
+ ++mbytes;
+ bytes -= MEGABYTE;
+ }
+ while (mbytes >= GIGABYTE / MEGABYTE) {
+ ++gbytes;
+ mbytes -= GIGABYTE / MEGABYTE;
+ }
+
+ if (gbytes == 0 && mbytes == 0 && bytes == 0)
+ __db_msgadd(env, &mb, "0");
+ else {
+ sep = "";
+ if (gbytes > 0) {
+ __db_msgadd(env, &mb, "%luGB", gbytes);
+ sep = " ";
+ }
+ if (mbytes > 0) {
+ __db_msgadd(env, &mb, "%s%luMB", sep, mbytes);
+ sep = " ";
+ }
+ if (bytes >= 1024) {
+ __db_msgadd(env, &mb, "%s%luKB", sep, bytes / 1024);
+ bytes %= 1024;
+ sep = " ";
+ }
+ if (bytes > 0)
+ __db_msgadd(env, &mb, "%s%luB", sep, bytes);
+ }
+
+ __db_msgadd(env, &mb, "\t%s", msg);
+
+ DB_MSGBUF_FLUSH(env, &mb);
+}
+
+/*
+ * __db_print_reginfo --
+ * Print out underlying shared region information.
+ *
+ * PUBLIC: void __db_print_reginfo
+ * PUBLIC: __P((ENV *, REGINFO *, const char *, u_int32_t));
+ */
+void
+__db_print_reginfo(env, infop, s, flags)
+ ENV *env;
+ REGINFO *infop;
+ const char *s;
+ u_int32_t flags;
+{
+ static const FN fn[] = {
+ { REGION_CREATE, "REGION_CREATE" },
+ { REGION_CREATE_OK, "REGION_CREATE_OK" },
+ { REGION_JOIN_OK, "REGION_JOIN_OK" },
+ { REGION_SHARED, "REGION_SHARED" },
+ { 0, NULL }
+ };
+
+ __db_msg(env, "%s", DB_GLOBAL(db_line));
+ __db_msg(env, "%s REGINFO information:", s);
+ STAT_STRING("Region type", __reg_type(infop->type));
+ STAT_ULONG("Region ID", infop->id);
+ STAT_STRING("Region name", infop->name);
+ STAT_POINTER("Region address", infop->addr);
+ STAT_POINTER("Region allocation head", infop->head);
+ STAT_POINTER("Region primary address", infop->primary);
+ STAT_ULONG("Region maximum allocation", infop->max_alloc);
+ STAT_ULONG("Region allocated", infop->allocated);
+ __env_alloc_print(infop, flags);
+
+ __db_prflags(env, NULL, infop->flags, fn, NULL, "\tRegion flags");
+}
+
+/*
+ * __reg_type --
+ * Return the region type string.
+ */
+static const char *
+__reg_type(t)
+ reg_type_t t;
+{
+ switch (t) {
+ case REGION_TYPE_ENV:
+ return ("Environment");
+ case REGION_TYPE_LOCK:
+ return ("Lock");
+ case REGION_TYPE_LOG:
+ return ("Log");
+ case REGION_TYPE_MPOOL:
+ return ("Mpool");
+ case REGION_TYPE_MUTEX:
+ return ("Mutex");
+ case REGION_TYPE_TXN:
+ return ("Transaction");
+ case INVALID_REGION_TYPE:
+ return ("Invalid");
+ }
+ return ("Unknown");
+}
+
+#else /* !HAVE_STATISTICS */
+
+/*
+ * __db_stat_not_built --
+ * Common error routine when library not built with statistics.
+ *
+ * PUBLIC: int __db_stat_not_built __P((ENV *));
+ */
+int
+__db_stat_not_built(env)
+ ENV *env;
+{
+ __db_errx(env, DB_STR("1554",
+ "Library build did not include statistics support"));
+ return (DB_OPNOTSUP);
+}
+
+int
+__env_stat_print_pp(dbenv, flags)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+{
+ COMPQUIET(flags, 0);
+
+ return (__db_stat_not_built(dbenv->env));
+}
+#endif