summaryrefslogtreecommitdiff
path: root/src/env/env_register.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/env/env_register.c')
-rw-r--r--src/env/env_register.c730
1 files changed, 730 insertions, 0 deletions
diff --git a/src/env/env_register.c b/src/env/env_register.c
new file mode 100644
index 00000000..7475444d
--- /dev/null
+++ b/src/env/env_register.c
@@ -0,0 +1,730 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2004, 2012 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+#define REGISTER_FILE "__db.register"
+
+#define PID_EMPTY "X 0\n" /* Unused PID entry */
+#define PID_FMT "%24lu\n" /* PID entry format */
+ /* Unused PID test */
+#define PID_ISEMPTY(p) (memcmp(p, PID_EMPTY, PID_LEN) == 0)
+#define PID_LEN (25) /* PID entry length */
+
+#define REGISTRY_LOCK(env, pos, nowait) \
+ __os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), 1, nowait)
+#define REGISTRY_UNLOCK(env, pos) \
+ __os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), 0, 0)
+#define REGISTRY_EXCL_LOCK(env, nowait) \
+ REGISTRY_LOCK(env, 1, nowait)
+#define REGISTRY_EXCL_UNLOCK(env) \
+ REGISTRY_UNLOCK(env, 1)
+
+static int __envreg_add __P((ENV *, int *, u_int32_t));
+static int __envreg_pid_compare __P((const void *, const void *));
+static int __envreg_create_active_pid __P((ENV *, char *));
+
+/*
+ * Support for portable, multi-process database environment locking, based on
+ * the Subversion SR (#11511).
+ *
+ * The registry feature is configured by specifying the DB_REGISTER flag to the
+ * DbEnv.open method. If DB_REGISTER is specified, DB opens the registry file
+ * in the database environment home directory. The registry file is formatted
+ * as follows:
+ *
+ * 12345 # process ID slot 1
+ * X # empty slot
+ * 12346 # process ID slot 2
+ * X # empty slot
+ * 12347 # process ID slot 3
+ * 12348 # process ID slot 4
+ * X 12349 # empty slot
+ * X # empty slot
+ *
+ * All lines are fixed-length. All lines are process ID slots. Empty slots
+ * are marked with leading non-digit characters.
+ *
+ * To modify the file, you get an exclusive lock on the first byte of the file.
+ *
+ * While holding any DbEnv handle, each process has an exclusive lock on the
+ * first byte of a process ID slot. There is a restriction on having more
+ * than one DbEnv handle open at a time, because Berkeley DB uses per-process
+ * locking to implement this feature, that is, a process may never have more
+ * than a single slot locked.
+ *
+ * This work requires that if a process dies or the system crashes, locks held
+ * by the dying processes will be dropped. (We can't use system shared
+ * memory-backed or filesystem-backed locks because they're persistent when a
+ * process dies.) On POSIX systems, we use fcntl(2) locks; on Win32 we have
+ * LockFileEx/UnlockFile, except for Win/9X and Win/ME which have to loop on
+ * Lockfile/UnlockFile.
+ *
+ * We could implement the same solution with flock locking instead of fcntl,
+ * but flock would require a separate file for each process of control (and
+ * probably each DbEnv handle) in the database environment, which is fairly
+ * ugly.
+ *
+ * Whenever a process opens a new DbEnv handle, it walks the registry file and
+ * verifies it CANNOT acquire the lock for any non-empty slot. If a lock for
+ * a non-empty slot is available, we know a process died holding an open handle,
+ * and recovery needs to be run.
+ *
+ * It's possible to get corruption in the registry file. If a write system
+ * call fails after partially completing, there can be corrupted entries in
+ * the registry file, or a partial entry at the end of the file. This is OK.
+ * A corrupted entry will be flagged as a non-empty line during the registry
+ * file walk. Since the line was corrupted by process failure, no process will
+ * hold a lock on the slot, which will lead to recovery being run.
+ *
+ * There can still be processes running in the environment when we recover it,
+ * and, in fact, there can still be processes running in the old environment
+ * after we're up and running in a new one. This is safe because performing
+ * recovery panics (and removes) the existing environment, so the window of
+ * vulnerability is small. Further, we check the panic flag in the DB API
+ * methods, when waking from spinning on a mutex, and whenever we're about to
+ * write to disk). The only window of corruption is if the write check of the
+ * panic were to complete, the region subsequently be recovered, and then the
+ * write continues. That's very, very unlikely to happen. This vulnerability
+ * already exists in Berkeley DB, too, the registry code doesn't make it any
+ * worse than it already is.
+ *
+ * The only way to avoid that window entirely is to ensure that all processes
+ * in the Berkeley DB environment exit before we run recovery. Applications
+ * can do that if they maintain their own process registry outside of Berkeley
+ * DB, but it's a little more difficult to do here. The obvious approach is
+ * to send signals to any process using the database environment as soon as we
+ * decide to run recovery, but there are problems with that approach: we might
+ * not have permission to send signals to the process, the process might have
+ * signal handlers installed, the cookie stored might not be the same as kill's
+ * argument, we may not be able to reliably tell if the process died, and there
+ * are probably other problems. However, if we can send a signal, it reduces
+ * the window, and so we include the code here. To configure it, turn on the
+ * DB_ENVREG_KILL_ALL #define.
+ */
+#define DB_ENVREG_KILL_ALL 0
+
+/*
+ * __envreg_register --
+ * Register a ENV handle.
+ *
+ * PUBLIC: int __envreg_register __P((ENV *, int *, u_int32_t));
+ */
+int
+__envreg_register(env, need_recoveryp, flags)
+ ENV *env;
+ int *need_recoveryp;
+ u_int32_t flags;
+{
+ DB_ENV *dbenv;
+ pid_t pid;
+ u_int32_t bytes, mbytes;
+ int ret;
+ char *pp;
+
+ *need_recoveryp = 0;
+
+ dbenv = env->dbenv;
+ dbenv->thread_id(dbenv, &pid, NULL);
+ pp = NULL;
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1524",
+ "%lu: register environment", "%lu"), (u_long)pid);
+
+ /* Build the path name and open the registry file. */
+ if ((ret = __db_appname(env,
+ DB_APP_NONE, REGISTER_FILE, NULL, &pp)) != 0)
+ goto err;
+ if ((ret = __os_open(env, pp, 0,
+ DB_OSO_CREATE, DB_MODE_660, &dbenv->registry)) != 0)
+ goto err;
+
+ /*
+ * Wait for an exclusive lock on the file.
+ *
+ * !!!
+ * We're locking bytes that don't yet exist, but that's OK as far as
+ * I know.
+ */
+ if ((ret = REGISTRY_EXCL_LOCK(env, 0)) != 0)
+ goto err;
+
+ /*
+ * If the file size is 0, initialize the file.
+ *
+ * Run recovery if we create the file, that means we can clean up the
+ * system by removing the registry file and restarting the application.
+ */
+ if ((ret = __os_ioinfo(
+ env, pp, dbenv->registry, &mbytes, &bytes, NULL)) != 0)
+ goto err;
+ if (mbytes == 0 && bytes == 0) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1525",
+ "%lu: creating %s", "%lu %s"), (u_long)pid, pp);
+ *need_recoveryp = 1;
+ }
+
+ /* Register this process. */
+ if ((ret = __envreg_add(env, need_recoveryp, flags)) != 0)
+ goto err;
+
+ /*
+ * Release our exclusive lock if we don't need to run recovery. If
+ * we need to run recovery, ENV->open will call back into register
+ * code once recovery has completed.
+ */
+ if (*need_recoveryp == 0 && (ret = REGISTRY_EXCL_UNLOCK(env)) != 0)
+ goto err;
+
+ if (0) {
+err: *need_recoveryp = 0;
+
+ /*
+ * !!!
+ * Closing the file handle must release all of our locks.
+ */
+ if (dbenv->registry != NULL)
+ (void)__os_closehandle(env, dbenv->registry);
+ dbenv->registry = NULL;
+ }
+
+ if (pp != NULL)
+ __os_free(env, pp);
+
+ return (ret);
+}
+
+/*
+ * __envreg_add --
+ * Add the process' pid to the register.
+ */
+static int
+__envreg_add(env, need_recoveryp, flags)
+ ENV *env;
+ int *need_recoveryp;
+ u_int32_t flags;
+{
+ DB_ENV *dbenv;
+ DB_THREAD_INFO *ip;
+ REGENV * renv;
+ REGINFO *infop;
+ pid_t pid;
+ off_t end, pos, dead;
+ size_t nr, nw;
+ u_int lcnt;
+ u_int32_t bytes, mbytes, orig_flags;
+ int need_recovery, ret, t_ret;
+ char *p, buf[PID_LEN + 10], pid_buf[PID_LEN + 10];
+
+ dbenv = env->dbenv;
+ need_recovery = 0;
+ COMPQUIET(dead, 0);
+ COMPQUIET(p, NULL);
+ ip = NULL;
+
+ /* Get a copy of our process ID. */
+ dbenv->thread_id(dbenv, &pid, NULL);
+ snprintf(pid_buf, sizeof(pid_buf), PID_FMT, (u_long)pid);
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1526",
+ "%lu: adding self to registry", "%lu"), (u_long)pid);
+
+#if DB_ENVREG_KILL_ALL
+ if (0) {
+kill_all: /*
+ * A second pass through the file, this time killing any
+ * processes still running.
+ */
+ if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+ return (ret);
+ }
+#endif
+
+ /*
+ * Read the file. Skip empty slots, and check that a lock is held
+ * for any allocated slots. An allocated slot which we can lock
+ * indicates a process died holding a handle and recovery needs to
+ * be run.
+ */
+ for (lcnt = 0;; ++lcnt) {
+ if ((ret = __os_read(
+ env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+ return (ret);
+ if (nr == 0)
+ break;
+
+ /*
+ * A partial record at the end of the file is possible if a
+ * previously un-registered process was interrupted while
+ * registering.
+ */
+ if (nr != PID_LEN) {
+ need_recovery = 1;
+ break;
+ }
+
+ if (PID_ISEMPTY(buf)) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1527",
+ "%02u: EMPTY", "%02u"), lcnt);
+ continue;
+ }
+
+ /*
+ * !!!
+ * DB_REGISTER is implemented using per-process locking, only
+ * a single ENV handle may be open per process. Enforce
+ * that restriction.
+ */
+ if (memcmp(buf, pid_buf, PID_LEN) == 0) {
+ __db_errx(env, DB_STR("1528",
+"DB_REGISTER limits processes to one open DB_ENV handle per environment"));
+ return (EINVAL);
+ }
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) {
+ for (p = buf; *p == ' ';)
+ ++p;
+ buf[nr - 1] = '\0';
+ }
+
+#if DB_ENVREG_KILL_ALL
+ if (need_recovery) {
+ pid = (pid_t)strtoul(buf, NULL, 10);
+ (void)kill(pid, SIGKILL);
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1529",
+ "%02u: %s: KILLED", "%02u %s"), lcnt, p);
+ continue;
+ }
+#endif
+ pos = (off_t)lcnt * PID_LEN;
+ if (REGISTRY_LOCK(env, pos, 1) == 0) {
+ if ((ret = REGISTRY_UNLOCK(env, pos)) != 0)
+ return (ret);
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1530",
+ "%02u: %s: FAILED", "%02u %s"), lcnt, p);
+
+ need_recovery = 1;
+ dead = pos;
+#if DB_ENVREG_KILL_ALL
+ goto kill_all;
+#else
+ break;
+#endif
+ } else
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1531",
+ "%02u: %s: LOCKED", "%02u %s"), lcnt, p);
+ }
+
+ /*
+ * If we have to perform recovery...
+ *
+ * Mark all slots empty. Registry ignores empty slots we can't lock,
+ * so it doesn't matter if any of the processes are in the middle of
+ * exiting Berkeley DB -- they'll discard their lock when they exit.
+ */
+ if (need_recovery) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, "%lu: recovery required", (u_long)pid);
+
+ if (LF_ISSET(DB_FAILCHK) || LF_ISSET(DB_FAILCHK_ISALIVE)) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env,
+ "%lu: performing failchk", (u_long)pid);
+
+ if (LF_ISSET(DB_FAILCHK_ISALIVE))
+ if ((ret = __envreg_create_active_pid(
+ env, pid_buf)) != 0)
+ goto sig_proc;
+
+ /* The environment will already exist, so we do not
+ * want DB_CREATE set, nor do we want any recovery at
+ * this point. No need to put values back as flags is
+ * passed in by value. Save original dbenv flags in
+ * case we need to recover/remove existing environment.
+ * Set DB_ENV_FAILCHK before attach to help ensure we
+ * dont block on a mutex held by the dead process.
+ */
+ LF_CLR(DB_CREATE | DB_RECOVER | DB_RECOVER_FATAL);
+ orig_flags = dbenv->flags;
+ F_SET(dbenv, DB_ENV_FAILCHK);
+ /* Attach to environment and subsystems. */
+ if ((ret = __env_attach_regions(
+ dbenv, flags, orig_flags, 0)) != 0)
+ goto sig_proc;
+ if ((t_ret =
+ __env_set_state(env, &ip, THREAD_FAILCHK)) != 0 &&
+ ret == 0)
+ ret = t_ret;
+ if ((t_ret =
+ __env_failchk_int(dbenv)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /* Free active pid array if used. */
+ if (LF_ISSET(DB_FAILCHK_ISALIVE)) {
+ DB_GLOBAL(num_active_pids) = 0;
+ DB_GLOBAL(size_active_pids) = 0;
+ __os_free( env, DB_GLOBAL(active_pids));
+ }
+
+ /* Detach from environment and deregister thread. */
+ if ((t_ret =
+ __env_refresh(dbenv, orig_flags, 0)) != 0 &&
+ ret == 0)
+ ret = t_ret;
+ if (ret == 0) {
+ if ((ret = __os_seek(env, dbenv->registry,
+ 0, 0,(u_int32_t)dead)) != 0 ||
+ (ret = __os_write(env, dbenv->registry,
+ PID_EMPTY, PID_LEN, &nw)) != 0)
+ return (ret);
+ need_recovery = 0;
+ goto add;
+ }
+
+ }
+ /* If we can't attach, then we cannot set DB_REGISTER panic. */
+sig_proc: if (__env_attach(env, NULL, 0, 0) == 0) {
+ infop = env->reginfo;
+ renv = infop->primary;
+ /* Indicate DB_REGSITER panic. Also, set environment
+ * panic as this is the panic trigger mechanism in
+ * the code that everything looks for.
+ */
+ renv->reg_panic = 1;
+ renv->panic = 1;
+ (void)__env_detach(env, 0);
+ }
+
+ /* Wait for processes to see the panic and leave. */
+ __os_yield(env, 0, dbenv->envreg_timeout);
+
+ /* FIGURE out how big the file is. */
+ if ((ret = __os_ioinfo(
+ env, NULL, dbenv->registry, &mbytes, &bytes, NULL)) != 0)
+ return (ret);
+ end = (off_t)mbytes * MEGABYTE + bytes;
+
+ /*
+ * Seek to the beginning of the file and overwrite slots to
+ * the end of the file.
+ *
+ * It's possible for there to be a partial entry at the end of
+ * the file if a process died when trying to register. If so,
+ * correct for it and overwrite it as well.
+ */
+ if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+ return (ret);
+ for (lcnt = 0; lcnt < ((u_int)end / PID_LEN +
+ ((u_int)end % PID_LEN == 0 ? 0 : 1)); ++lcnt) {
+
+ if ((ret = __os_read(
+ env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+ return (ret);
+
+ pos = (off_t)lcnt * PID_LEN;
+ /* do not notify on dead process */
+ if (pos != dead) {
+ pid = (pid_t)strtoul(buf, NULL, 10);
+ DB_EVENT(env, DB_EVENT_REG_ALIVE, &pid);
+ }
+
+ if ((ret = __os_seek(env,
+ dbenv->registry, 0, 0, (u_int32_t)pos)) != 0 ||
+ (ret = __os_write(env,
+ dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0)
+ return (ret);
+ }
+ /* wait one last time to get everyone out */
+ __os_yield(env, 0, dbenv->envreg_timeout);
+ }
+
+ /*
+ * Seek to the first process slot and add ourselves to the first empty
+ * slot we can lock.
+ */
+add: if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+ return (ret);
+ for (lcnt = 0;; ++lcnt) {
+ if ((ret = __os_read(
+ env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+ return (ret);
+ if (nr == PID_LEN && !PID_ISEMPTY(buf))
+ continue;
+ pos = (off_t)lcnt * PID_LEN;
+ if (REGISTRY_LOCK(env, pos, 1) == 0) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1532",
+ "%lu: locking slot %02u at offset %lu",
+ "%lu %02u %lu"), (u_long)pid, lcnt,
+ (u_long)pos);
+
+ if ((ret = __os_seek(env,
+ dbenv->registry, 0, 0, (u_int32_t)pos)) != 0 ||
+ (ret = __os_write(env,
+ dbenv->registry, pid_buf, PID_LEN, &nw)) != 0)
+ return (ret);
+ dbenv->registry_off = (u_int32_t)pos;
+ break;
+ }
+ }
+
+ if (need_recovery)
+ *need_recoveryp = 1;
+
+ return (ret);
+}
+
+/*
+ * __envreg_unregister --
+ * Unregister a ENV handle.
+ *
+ * PUBLIC: int __envreg_unregister __P((ENV *, int));
+ */
+int
+__envreg_unregister(env, recovery_failed)
+ ENV *env;
+ int recovery_failed;
+{
+ DB_ENV *dbenv;
+ size_t nw;
+ int ret, t_ret;
+
+ dbenv = env->dbenv;
+ ret = 0;
+
+ /*
+ * If recovery failed, we want to drop our locks and return, but still
+ * make sure any subsequent process doesn't decide everything is just
+ * fine and try to get into the database environment. In the case of
+ * an error, discard our locks, but leave our slot filled-in.
+ */
+ if (recovery_failed)
+ goto err;
+
+ /*
+ * Why isn't an exclusive lock necessary to discard a ENV handle?
+ *
+ * We mark our process ID slot empty before we discard the process slot
+ * lock, and threads of control reviewing the register file ignore any
+ * slots which they can't lock.
+ */
+ if ((ret = __os_seek(env,
+ dbenv->registry, 0, 0, dbenv->registry_off)) != 0 ||
+ (ret = __os_write(
+ env, dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0)
+ goto err;
+
+ /*
+ * !!!
+ * This code assumes that closing the file descriptor discards all
+ * held locks.
+ *
+ * !!!
+ * There is an ordering problem here -- in the case of a process that
+ * failed in recovery, we're unlocking both the exclusive lock and our
+ * slot lock. If the OS unlocked the exclusive lock and then allowed
+ * another thread of control to acquire the exclusive lock before also
+ * also releasing our slot lock, we could race. That can't happen, I
+ * don't think.
+ */
+err: if ((t_ret =
+ __os_closehandle(env, dbenv->registry)) != 0 && ret == 0)
+ ret = t_ret;
+
+ dbenv->registry = NULL;
+ return (ret);
+}
+
+/*
+ * __envreg_xunlock --
+ * Discard the exclusive lock held by the ENV handle.
+ *
+ * PUBLIC: int __envreg_xunlock __P((ENV *));
+ */
+int
+__envreg_xunlock(env)
+ ENV *env;
+{
+ DB_ENV *dbenv;
+ pid_t pid;
+ int ret;
+
+ dbenv = env->dbenv;
+ dbenv->thread_id(dbenv, &pid, NULL);
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR_A("1533",
+ "%lu: recovery completed, unlocking", "%lu"), (u_long)pid);
+
+ if ((ret = REGISTRY_EXCL_UNLOCK(env)) == 0)
+ return (ret);
+
+ __db_err(env, ret, DB_STR_A("1534",
+ "%s: exclusive file unlock", "%s"), REGISTER_FILE);
+ return (__env_panic(env, ret));
+}
+
+/*
+ * __envreg_pid_compare --
+ * Compare routine for qsort and bsearch calls.
+ * returns neg if key is less than membr, 0 if equal and
+ * pos if key is greater than membr.
+ */
+static int
+__envreg_pid_compare(key, membr)
+ const void *key;
+ const void *membr;
+{
+ return ( *(pid_t*)key - *(pid_t*)membr );
+}
+
+/*
+ * __envreg_isalive --
+ * Default isalive function that uses contents of an array of active pids
+ * gotten from the db_register file to determine if process is still
+ * alive.
+ *
+ * PUBLIC: int __envreg_isalive
+ * PUBLIC: __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t));
+ */
+int
+__envreg_isalive(dbenv, pid, tid, flags )
+ DB_ENV *dbenv;
+ pid_t pid;
+ db_threadid_t tid;
+ u_int32_t flags;
+{
+ /* in this case we really do not care about tid, simply for lint */
+ DB_THREADID_INIT(tid);
+
+ /* if is not an expected value then return early */
+ if (!((flags == 0) || (flags == DB_MUTEX_PROCESS_ONLY)))
+ return (EINVAL);
+
+ if (DB_GLOBAL(active_pids) == NULL ||
+ DB_GLOBAL(num_active_pids) == 0 || dbenv == NULL)
+ return (0);
+ /*
+ * bsearch returns a pointer to an entry in active_pids if a match
+ * is found on pid, else no match found it returns NULL. This
+ * routine will return a 1 if a match is found, else a 0.
+ */
+ if (bsearch(&pid, DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids),
+ sizeof(pid_t), __envreg_pid_compare))
+ return 1;
+
+ return (0);
+}
+
+/*
+ * __envreg_create_active_pid --
+ * Create array of pids, if need more room in array then double size.
+ * Only add active pids from DB_REGISTER file into array.
+ */
+static int
+__envreg_create_active_pid(env, my_pid)
+ ENV *env;
+ char *my_pid;
+{
+ DB_ENV *dbenv;
+ char buf[PID_LEN + 10];
+ int ret;
+ off_t pos;
+ pid_t pid, *tmparray;
+ size_t tmpsize, nr;
+ u_int lcnt;
+
+ dbenv = env->dbenv;
+ pos = 0;
+ ret = 0;
+
+ /*
+ * Walk through DB_REGISTER file, we grab pid entries that are locked
+ * as those represent processes that are still alive. Ignore empty
+ * slots, or those that are unlocked.
+ */
+ if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+ return (ret);
+ for (lcnt = 0;; ++lcnt) {
+ if ((ret = __os_read(
+ env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+ return (ret);
+
+ /* all done is read nothing, or get a partial record */
+ if (nr == 0 || nr != PID_LEN)
+ break;
+ if (PID_ISEMPTY(buf))
+ continue;
+
+ pos = (off_t)lcnt * PID_LEN;
+ if (REGISTRY_LOCK(env, pos, 1) == 0) {
+ /* got lock, so process died. Do not add to array */
+ if ((ret = REGISTRY_UNLOCK(env, pos)) != 0)
+ return (ret);
+ } else {
+ /* first, check to make sure we have room in arrary */
+ if (DB_GLOBAL(num_active_pids) + 1 >
+ DB_GLOBAL(size_active_pids)) {
+ tmpsize =
+ DB_GLOBAL(size_active_pids) * sizeof(pid_t);
+
+ /* start with 512, then double if must grow */
+ tmpsize = tmpsize>0 ? tmpsize*2 : 512;
+ if ((ret = __os_malloc
+ (env, tmpsize, &tmparray )) != 0)
+ return (ret);
+
+ /* if array exists, then copy and free */
+ if (DB_GLOBAL(active_pids)) {
+ memcpy( tmparray,
+ DB_GLOBAL(active_pids),
+ DB_GLOBAL(num_active_pids) *
+ sizeof(pid_t));
+ __os_free( env, DB_GLOBAL(active_pids));
+ }
+
+ DB_GLOBAL(active_pids) = tmparray;
+ DB_GLOBAL(size_active_pids) = tmpsize;
+
+ /*
+ * The process getting here has not been added
+ * to the DB_REGISTER file yet, so include it
+ * as the first item in array
+ */
+ if (DB_GLOBAL(num_active_pids) == 0) {
+ pid = (pid_t)strtoul(my_pid, NULL, 10);
+ DB_GLOBAL(active_pids)
+ [DB_GLOBAL(num_active_pids)++] = pid;
+ }
+ }
+
+ /* insert into array */
+ pid = (pid_t)strtoul(buf, NULL, 10);
+ DB_GLOBAL(active_pids)
+ [DB_GLOBAL(num_active_pids)++] = pid;
+
+ }
+
+ }
+
+ /* lets sort the array to allow for binary search in isalive func */
+ qsort(DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids),
+ sizeof(pid_t), __envreg_pid_compare);
+ return (ret);
+}