1 files changed, 730 insertions, 0 deletions
diff --git a/src/env/env_register.c b/src/env/env_register.c
new file mode 100644
index 00000000..7475444d
--- /dev/null
+++ b/src/env/env_register.c
@@ -0,0 +1,730 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2004, 2012 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+#define	REGISTER_FILE	"__db.register"
+
+#define	PID_EMPTY	"X                      0\n"	/* Unused PID entry */
+#define	PID_FMT		"%24lu\n"			/* PID entry format */
+							/* Unused PID test */
+#define	PID_ISEMPTY(p)	(memcmp(p, PID_EMPTY, PID_LEN) == 0)
+#define	PID_LEN		(25)				/* PID entry length */
+
+#define	REGISTRY_LOCK(env, pos, nowait)					\
+	__os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), 1, nowait)
+#define	REGISTRY_UNLOCK(env, pos)					\
+	__os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), 0, 0)
+#define	REGISTRY_EXCL_LOCK(env, nowait)					\
+	REGISTRY_LOCK(env, 1, nowait)
+#define	REGISTRY_EXCL_UNLOCK(env)					\
+	REGISTRY_UNLOCK(env, 1)
+
+static	int __envreg_add __P((ENV *, int *, u_int32_t));
+static	int __envreg_pid_compare __P((const void *, const void *));
+static	int __envreg_create_active_pid __P((ENV *, char *));
+
+/*
+ * Support for portable, multi-process database environment locking, based on
+ * the Subversion SR (#11511).
+ *
+ * The registry feature is configured by specifying the DB_REGISTER flag to the
+ * DbEnv.open method.  If DB_REGISTER is specified, DB opens the registry file
+ * in the database environment home directory.  The registry file is formatted
+ * as follows:
+ *
+ *	                    12345		# process ID slot 1
+ *	X		# empty slot
+ *	                    12346		# process ID slot 2
+ *	X		# empty slot
+ *	                    12347		# process ID slot 3
+ *	                    12348		# process ID slot 4
+ *	X                   12349		# empty slot
+ *	X		# empty slot
+ *
+ * All lines are fixed-length.  All lines are process ID slots.  Empty slots
+ * are marked with leading non-digit characters.
+ *
+ * To modify the file, you get an exclusive lock on the first byte of the file.
+ *
+ * While holding any DbEnv handle, each process has an exclusive lock on the
+ * first byte of a process ID slot.  There is a restriction on having more
+ * than one DbEnv handle open at a time, because Berkeley DB uses per-process
+ * locking to implement this feature, that is, a process may never have more
+ * than a single slot locked.
+ *
+ * This work requires that if a process dies or the system crashes, locks held
+ * by the dying processes will be dropped.  (We can't use system shared
+ * memory-backed or filesystem-backed locks because they're persistent when a
+ * process dies.)  On POSIX systems, we use fcntl(2) locks; on Win32 we have
+ * LockFileEx/UnlockFile, except for Win/9X and Win/ME which have to loop on
+ * Lockfile/UnlockFile.
+ *
+ * We could implement the same solution with flock locking instead of fcntl,
+ * but flock would require a separate file for each process of control (and
+ * probably each DbEnv handle) in the database environment, which is fairly
+ * ugly.
+ *
+ * Whenever a process opens a new DbEnv handle, it walks the registry file and
+ * verifies it CANNOT acquire the lock for any non-empty slot.  If a lock for
+ * a non-empty slot is available, we know a process died holding an open handle,
+ * and recovery needs to be run.
+ *
+ * It's possible to get corruption in the registry file.  If a write system
+ * call fails after partially completing, there can be corrupted entries in
+ * the registry file, or a partial entry at the end of the file.  This is OK.
+ * A corrupted entry will be flagged as a non-empty line during the registry
+ * file walk.  Since the line was corrupted by process failure, no process will
+ * hold a lock on the slot, which will lead to recovery being run.
+ *
+ * There can still be processes running in the environment when we recover it,
+ * and, in fact, there can still be processes running in the old environment
+ * after we're up and running in a new one.  This is safe because performing
+ * recovery panics (and removes) the existing environment, so the window of
+ * vulnerability is small.  Further, we check the panic flag in the DB API
+ * methods, when waking from spinning on a mutex, and whenever we're about to
+ * write to disk).  The only window of corruption is if the write check of the
+ * panic were to complete, the region subsequently be recovered, and then the
+ * write continues.  That's very, very unlikely to happen.  This vulnerability
+ * already exists in Berkeley DB, too, the registry code doesn't make it any
+ * worse than it already is.
+ *
+ * The only way to avoid that window entirely is to ensure that all processes
+ * in the Berkeley DB environment exit before we run recovery.   Applications
+ * can do that if they maintain their own process registry outside of Berkeley
+ * DB, but it's a little more difficult to do here.   The obvious approach is
+ * to send signals to any process using the database environment as soon as we
+ * decide to run recovery, but there are problems with that approach: we might
+ * not have permission to send signals to the process, the process might have
+ * signal handlers installed, the cookie stored might not be the same as kill's
+ * argument, we may not be able to reliably tell if the process died, and there
+ * are probably other problems.  However, if we can send a signal, it reduces
+ * the window, and so we include the code here.  To configure it, turn on the
+ * DB_ENVREG_KILL_ALL #define.
+ */
+#define	DB_ENVREG_KILL_ALL	0
+
+/*
+ * __envreg_register --
+ *	Register a ENV handle.
+ *
+ * PUBLIC: int __envreg_register __P((ENV *, int *, u_int32_t));
+ */
+int
+__envreg_register(env, need_recoveryp, flags)
+	ENV *env;
+	int *need_recoveryp;
+	u_int32_t flags;
+{
+	DB_ENV *dbenv;
+	pid_t pid;
+	u_int32_t bytes, mbytes;
+	int ret;
+	char *pp;
+
+	*need_recoveryp = 0;
+
+	dbenv = env->dbenv;
+	dbenv->thread_id(dbenv, &pid, NULL);
+	pp = NULL;
+
+	if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+		__db_msg(env, DB_STR_A("1524",
+	"%lu: register environment", "%lu"), (u_long)pid);
+
+	/* Build the path name and open the registry file. */
+	if ((ret = __db_appname(env,
+	    DB_APP_NONE, REGISTER_FILE, NULL, &pp)) != 0)
+		goto err;
+	if ((ret = __os_open(env, pp, 0,
+	    DB_OSO_CREATE, DB_MODE_660, &dbenv->registry)) != 0)
+		goto err;
+
+	/*
+	 * Wait for an exclusive lock on the file.
+	 *
+	 * !!!
+	 * We're locking bytes that don't yet exist, but that's OK as far as
+	 * I know.
+	 */
+	if ((ret = REGISTRY_EXCL_LOCK(env, 0)) != 0)
+		goto err;
+
+	/*
+	 * If the file size is 0, initialize the file.
+	 *
+	 * Run recovery if we create the file, that means we can clean up the
+	 * system by removing the registry file and restarting the application.
+	 */
+	if ((ret = __os_ioinfo(
+	    env, pp, dbenv->registry, &mbytes, &bytes, NULL)) != 0)
+		goto err;
+	if (mbytes == 0 && bytes == 0) {
+		if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+			__db_msg(env, DB_STR_A("1525",
+			    "%lu: creating %s", "%lu %s"), (u_long)pid, pp);
+		*need_recoveryp = 1;
+	}
+
+	/* Register this process. */
+	if ((ret = __envreg_add(env, need_recoveryp, flags)) != 0)
+		goto err;
+
+	/*
+	 * Release our exclusive lock if we don't need to run recovery.  If
+	 * we need to run recovery, ENV->open will call back into register
+	 * code once recovery has completed.
+	 */
+	if (*need_recoveryp == 0 && (ret = REGISTRY_EXCL_UNLOCK(env)) != 0)
+		goto err;
+
+	if (0) {
+err:		*need_recoveryp = 0;
+
+		/*
+		 * !!!
+		 * Closing the file handle must release all of our locks.
+		 */
+		if (dbenv->registry != NULL)
+			(void)__os_closehandle(env, dbenv->registry);
+		dbenv->registry = NULL;
+	}
+
+	if (pp != NULL)
+		__os_free(env, pp);
+
+	return (ret);
+}
+
+/*
+ * __envreg_add --
+ *	Add the process' pid to the register.
+ */
+static int
+__envreg_add(env, need_recoveryp, flags)
+	ENV *env;
+	int *need_recoveryp;
+	u_int32_t flags;
+{
+	DB_ENV *dbenv;
+	DB_THREAD_INFO *ip;
+	REGENV * renv;
+	REGINFO *infop;
+	pid_t pid;
+	off_t end, pos, dead;
+	size_t nr, nw;
+	u_int lcnt;
+	u_int32_t bytes, mbytes, orig_flags;
+	int need_recovery, ret, t_ret;
+	char *p, buf[PID_LEN + 10], pid_buf[PID_LEN + 10];
+
+	dbenv = env->dbenv;
+	need_recovery = 0;
+	COMPQUIET(dead, 0);
+	COMPQUIET(p, NULL);
+	ip = NULL;
+
+	/* Get a copy of our process ID. */
+	dbenv->thread_id(dbenv, &pid, NULL);
+	snprintf(pid_buf, sizeof(pid_buf), PID_FMT, (u_long)pid);
+
+	if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+		__db_msg(env, DB_STR_A("1526",
+		    "%lu: adding self to registry", "%lu"), (u_long)pid);
+
+#if DB_ENVREG_KILL_ALL
+	if (0) {
+kill_all:	/*
+		 * A second pass through the file, this time killing any
+		 * processes still running.
+		 */
+		if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+			return (ret);
+	}
+#endif
+
+	/*
+	 * Read the file.  Skip empty slots, and check that a lock is held
+	 * for any allocated slots.  An allocated slot which we can lock
+	 * indicates a process died holding a handle and recovery needs to
+	 * be run.
+	 */
+	for (lcnt = 0;; ++lcnt) {
+		if ((ret = __os_read(
+		    env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+			return (ret);
+		if (nr == 0)
+			break;
+
+		/*
+		 * A partial record at the end of the file is possible if a
+		 * previously un-registered process was interrupted while
+		 * registering.
+		 */
+		if (nr != PID_LEN) {
+			need_recovery = 1;
+			break;
+		}
+
+		if (PID_ISEMPTY(buf)) {
+			if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+				__db_msg(env, DB_STR_A("1527",
+				    "%02u: EMPTY", "%02u"), lcnt);
+			continue;
+		}
+
+		/*
+		 * !!!
+		 * DB_REGISTER is implemented using per-process locking, only
+		 * a single ENV handle may be open per process.  Enforce
+		 * that restriction.
+		 */
+		if (memcmp(buf, pid_buf, PID_LEN) == 0) {
+			__db_errx(env, DB_STR("1528",
+"DB_REGISTER limits processes to one open DB_ENV handle per environment"));
+			return (EINVAL);
+		}
+
+		if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) {
+			for (p = buf; *p == ' ';)
+				++p;
+			buf[nr - 1] = '\0';
+		}
+
+#if DB_ENVREG_KILL_ALL
+		if (need_recovery) {
+			pid = (pid_t)strtoul(buf, NULL, 10);
+			(void)kill(pid, SIGKILL);
+
+			if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+				__db_msg(env, DB_STR_A("1529",
+				    "%02u: %s: KILLED", "%02u %s"), lcnt, p);
+			continue;
+		}
+#endif
+		pos = (off_t)lcnt * PID_LEN;
+		if (REGISTRY_LOCK(env, pos, 1) == 0) {
+			if ((ret = REGISTRY_UNLOCK(env, pos)) != 0)
+				return (ret);
+
+			if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+				__db_msg(env, DB_STR_A("1530",
+				    "%02u: %s: FAILED", "%02u %s"), lcnt, p);
+
+			need_recovery = 1;
+			dead = pos;
+#if DB_ENVREG_KILL_ALL
+			goto kill_all;
+#else
+			break;
+#endif
+		} else
+			if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+				__db_msg(env, DB_STR_A("1531",
+				    "%02u: %s: LOCKED", "%02u %s"), lcnt, p);
+	}
+
+	/*
+	 * If we have to perform recovery...
+	 *
+	 * Mark all slots empty.  Registry ignores empty slots we can't lock,
+	 * so it doesn't matter if any of the processes are in the middle of
+	 * exiting Berkeley DB -- they'll discard their lock when they exit.
+	 */
+	if (need_recovery) {
+		if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+			__db_msg(env, "%lu: recovery required", (u_long)pid);
+
+		if (LF_ISSET(DB_FAILCHK) || LF_ISSET(DB_FAILCHK_ISALIVE)) {
+			if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+				__db_msg(env,
+				    "%lu: performing failchk", (u_long)pid);
+
+			if (LF_ISSET(DB_FAILCHK_ISALIVE))
+				if ((ret = __envreg_create_active_pid(
+				    env, pid_buf)) != 0)
+					goto sig_proc;
+
+			/* The environment will already exist, so we do not
+			 * want DB_CREATE set, nor do we want any recovery at
+			 * this point.  No need to put values back as flags is
+			 * passed in by value.  Save original dbenv flags in
+			 * case we need to recover/remove existing environment.
+			 * Set DB_ENV_FAILCHK before attach to help ensure we
+			 * dont block on a mutex held by the dead process.
+			 */
+			LF_CLR(DB_CREATE | DB_RECOVER | DB_RECOVER_FATAL);
+			orig_flags = dbenv->flags;
+			F_SET(dbenv, DB_ENV_FAILCHK);
+			/* Attach to environment and subsystems. */
+			if ((ret = __env_attach_regions(
+			    dbenv, flags, orig_flags, 0)) != 0)
+				goto sig_proc;
+			if ((t_ret =
+			    __env_set_state(env, &ip, THREAD_FAILCHK)) != 0 &&
+			    ret == 0)
+				ret = t_ret;
+			if ((t_ret =
+			    __env_failchk_int(dbenv)) != 0 && ret == 0)
+				ret = t_ret;
+
+			/* Free active pid array if used. */
+			if (LF_ISSET(DB_FAILCHK_ISALIVE)) {
+				DB_GLOBAL(num_active_pids) = 0;
+				DB_GLOBAL(size_active_pids) = 0;
+				__os_free( env, DB_GLOBAL(active_pids));
+			}
+
+			/* Detach from environment and deregister thread. */
+			if ((t_ret =
+			    __env_refresh(dbenv, orig_flags, 0)) != 0 &&
+			    ret == 0)
+				ret = t_ret;
+			if (ret == 0) {
+				if ((ret = __os_seek(env, dbenv->registry,
+				    0, 0,(u_int32_t)dead)) != 0 ||
+				    (ret = __os_write(env, dbenv->registry,
+				    PID_EMPTY, PID_LEN, &nw)) != 0)
+					return (ret);
+				need_recovery = 0;
+				goto add;
+			}
+
+		}
+		/* If we can't attach, then we cannot set DB_REGISTER panic. */
+sig_proc:	if (__env_attach(env, NULL, 0, 0) == 0) {
+			infop = env->reginfo;
+			renv = infop->primary;
+			/* Indicate DB_REGSITER panic.  Also, set environment
+			 * panic as this is the panic trigger mechanism in
+			 * the code that everything looks for.
+			 */
+			renv->reg_panic = 1;
+			renv->panic = 1;
+			(void)__env_detach(env, 0);
+		}
+
+		/* Wait for processes to see the panic and leave. */
+		__os_yield(env, 0, dbenv->envreg_timeout);
+
+		/* FIGURE out how big the file is. */
+		if ((ret = __os_ioinfo(
+		    env, NULL, dbenv->registry, &mbytes, &bytes, NULL)) != 0)
+			return (ret);
+		end = (off_t)mbytes * MEGABYTE + bytes;
+
+		/*
+		 * Seek to the beginning of the file and overwrite slots to
+		 * the end of the file.
+		 *
+		 * It's possible for there to be a partial entry at the end of
+		 * the file if a process died when trying to register.  If so,
+		 * correct for it and overwrite it as well.
+		 */
+		if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+			return (ret);
+		for (lcnt = 0; lcnt < ((u_int)end / PID_LEN +
+		    ((u_int)end % PID_LEN == 0 ? 0 : 1)); ++lcnt) {
+
+			if ((ret = __os_read(
+			    env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+				return (ret);
+
+			pos = (off_t)lcnt * PID_LEN;
+			/* do not notify on dead process */
+			if (pos != dead) {
+				pid = (pid_t)strtoul(buf, NULL, 10);
+				DB_EVENT(env, DB_EVENT_REG_ALIVE, &pid);
+			}
+
+			if ((ret = __os_seek(env,
+			    dbenv->registry, 0, 0, (u_int32_t)pos)) != 0 ||
+			    (ret = __os_write(env,
+			    dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0)
+				return (ret);
+		}
+		/* wait one last time to get everyone out */
+		__os_yield(env, 0, dbenv->envreg_timeout);
+	}
+
+	/*
+	 * Seek to the first process slot and add ourselves to the first empty
+	 * slot we can lock.
+	 */
+add:	if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+		return (ret);
+	for (lcnt = 0;; ++lcnt) {
+		if ((ret = __os_read(
+		    env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+			return (ret);
+		if (nr == PID_LEN && !PID_ISEMPTY(buf))
+			continue;
+		pos = (off_t)lcnt * PID_LEN;
+		if (REGISTRY_LOCK(env, pos, 1) == 0) {
+			if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+				__db_msg(env, DB_STR_A("1532",
+				    "%lu: locking slot %02u at offset %lu",
+				    "%lu %02u %lu"), (u_long)pid, lcnt,
+				    (u_long)pos);
+
+			if ((ret = __os_seek(env,
+			    dbenv->registry, 0, 0, (u_int32_t)pos)) != 0 ||
+			    (ret = __os_write(env,
+			    dbenv->registry, pid_buf, PID_LEN, &nw)) != 0)
+				return (ret);
+			dbenv->registry_off = (u_int32_t)pos;
+			break;
+		}
+	}
+
+	if (need_recovery)
+		*need_recoveryp = 1;
+
+	return (ret);
+}
+
+/*
+ * __envreg_unregister --
+ *	Unregister a ENV handle.
+ *
+ * PUBLIC: int __envreg_unregister __P((ENV *, int));
+ */
+int
+__envreg_unregister(env, recovery_failed)
+	ENV *env;
+	int recovery_failed;
+{
+	DB_ENV *dbenv;
+	size_t nw;
+	int ret, t_ret;
+
+	dbenv = env->dbenv;
+	ret = 0;
+
+	/*
+	 * If recovery failed, we want to drop our locks and return, but still
+	 * make sure any subsequent process doesn't decide everything is just
+	 * fine and try to get into the database environment.  In the case of
+	 * an error, discard our locks, but leave our slot filled-in.
+	 */
+	if (recovery_failed)
+		goto err;
+
+	/*
+	 * Why isn't an exclusive lock necessary to discard a ENV handle?
+	 *
+	 * We mark our process ID slot empty before we discard the process slot
+	 * lock, and threads of control reviewing the register file ignore any
+	 * slots which they can't lock.
+	 */
+	if ((ret = __os_seek(env,
+	    dbenv->registry, 0, 0, dbenv->registry_off)) != 0 ||
+	    (ret = __os_write(
+	    env, dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0)
+		goto err;
+
+	/*
+	 * !!!
+	 * This code assumes that closing the file descriptor discards all
+	 * held locks.
+	 *
+	 * !!!
+	 * There is an ordering problem here -- in the case of a process that
+	 * failed in recovery, we're unlocking both the exclusive lock and our
+	 * slot lock.  If the OS unlocked the exclusive lock and then allowed
+	 * another thread of control to acquire the exclusive lock before also
+	 * also releasing our slot lock, we could race.  That can't happen, I
+	 * don't think.
+	 */
+err:	if ((t_ret =
+	    __os_closehandle(env, dbenv->registry)) != 0 && ret == 0)
+		ret = t_ret;
+
+	dbenv->registry = NULL;
+	return (ret);
+}
+
+/*
+ * __envreg_xunlock --
+ *	Discard the exclusive lock held by the ENV handle.
+ *
+ * PUBLIC: int __envreg_xunlock __P((ENV *));
+ */
+int
+__envreg_xunlock(env)
+	ENV *env;
+{
+	DB_ENV *dbenv;
+	pid_t pid;
+	int ret;
+
+	dbenv = env->dbenv;
+	dbenv->thread_id(dbenv, &pid, NULL);
+
+	if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+		__db_msg(env, DB_STR_A("1533",
+		    "%lu: recovery completed, unlocking", "%lu"), (u_long)pid);
+
+	if ((ret = REGISTRY_EXCL_UNLOCK(env)) == 0)
+		return (ret);
+
+	__db_err(env, ret, DB_STR_A("1534",
+	    "%s: exclusive file unlock", "%s"), REGISTER_FILE);
+	return (__env_panic(env, ret));
+}
+
+/*
+ * __envreg_pid_compare --
+ *	Compare routine for qsort and bsearch calls.
+ *	returns neg if key is less than membr, 0 if equal and
+ *	pos if key is greater than membr.
+ */
+static int
+__envreg_pid_compare(key, membr)
+	const void *key;
+	const void *membr;
+{
+	return ( *(pid_t*)key - *(pid_t*)membr );
+}
+
+/*
+ * __envreg_isalive --
+ *	Default isalive function that uses contents of an array of active pids
+ *	gotten from the db_register file to determine if process is still
+ *	alive.
+ *
+ * PUBLIC: int __envreg_isalive
+ * PUBLIC:   __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t));
+ */
+int
+__envreg_isalive(dbenv, pid, tid, flags )
+	DB_ENV *dbenv;
+	pid_t pid;
+	db_threadid_t tid;
+	u_int32_t flags;
+{
+	/* in this case we really do not care about tid, simply for lint */
+	DB_THREADID_INIT(tid);
+
+	/* if is not an expected value then return early */
+	if (!((flags == 0) || (flags == DB_MUTEX_PROCESS_ONLY)))
+		return (EINVAL);
+
+	if (DB_GLOBAL(active_pids) == NULL ||
+	    DB_GLOBAL(num_active_pids) == 0 || dbenv == NULL)
+		return (0);
+	/*
+	 * bsearch returns a pointer to an entry in active_pids if a match
+	 * is found on pid, else no match found it returns NULL.   This
+	 * routine will return a 1 if a match is found, else a 0.
+	 */
+	if (bsearch(&pid, DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids),
+	    sizeof(pid_t), __envreg_pid_compare))
+		return 1;
+
+	return (0);
+}
+
+/*
+ * __envreg_create_active_pid --
+ *	Create array of pids, if need more room in array then double size.
+ *	Only add active pids from DB_REGISTER file into array.
+ */
+static int
+__envreg_create_active_pid(env, my_pid)
+	ENV *env;
+	char *my_pid;
+{
+	DB_ENV *dbenv;
+	char buf[PID_LEN + 10];
+	int    ret;
+	off_t  pos;
+	pid_t  pid, *tmparray;
+	size_t tmpsize, nr;
+	u_int lcnt;
+
+	dbenv = env->dbenv;
+	pos = 0;
+	ret = 0;
+
+	/*
+	 * Walk through DB_REGISTER file, we grab pid entries that are locked
+	 * as those represent processes that are still alive.   Ignore empty
+	 * slots, or those that are unlocked.
+	 */
+	if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
+		return (ret);
+	for (lcnt = 0;; ++lcnt) {
+		if ((ret = __os_read(
+		    env, dbenv->registry, buf, PID_LEN, &nr)) != 0)
+			return (ret);
+
+		/* all done is read nothing, or get a partial record */
+		if (nr == 0 || nr != PID_LEN)
+			break;
+		if (PID_ISEMPTY(buf))
+			continue;
+
+		pos = (off_t)lcnt * PID_LEN;
+		if (REGISTRY_LOCK(env, pos, 1) == 0) {
+			/* got lock, so process died. Do not add to array */
+			if ((ret = REGISTRY_UNLOCK(env, pos)) != 0)
+				return (ret);
+		} else {
+			/* first, check to make sure we have room in arrary */
+			if (DB_GLOBAL(num_active_pids) + 1 >
+			    DB_GLOBAL(size_active_pids)) {
+				tmpsize =
+				   DB_GLOBAL(size_active_pids) * sizeof(pid_t);
+
+				/* start with 512, then double if must grow */
+				tmpsize = tmpsize>0 ? tmpsize*2 : 512;
+				if ((ret = __os_malloc
+				    (env, tmpsize, &tmparray )) != 0)
+					return (ret);
+
+				/* if array exists, then copy and free */
+				if (DB_GLOBAL(active_pids)) {
+					memcpy( tmparray,
+					    DB_GLOBAL(active_pids),
+					    DB_GLOBAL(num_active_pids) *
+					    sizeof(pid_t));
+					__os_free( env, DB_GLOBAL(active_pids));
+				}
+
+				DB_GLOBAL(active_pids) = tmparray;
+				DB_GLOBAL(size_active_pids) = tmpsize;
+
+				/*
+				 * The process getting here has not been added
+				 * to the DB_REGISTER file yet, so include it
+				 * as the first item in array
+				 */
+				if (DB_GLOBAL(num_active_pids) == 0) {
+					pid = (pid_t)strtoul(my_pid, NULL, 10);
+					DB_GLOBAL(active_pids)
+					   [DB_GLOBAL(num_active_pids)++] = pid;
+				}
+			}
+
+			/* insert into array */
+			pid = (pid_t)strtoul(buf, NULL, 10);
+			DB_GLOBAL(active_pids)
+			    [DB_GLOBAL(num_active_pids)++] = pid;
+
+		}
+
+	}
+
+	/* lets sort the array to allow for binary search in isalive func */
+	qsort(DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids),
+	    sizeof(pid_t), __envreg_pid_compare);
+	return (ret);
+}