Imported from /home/lorry/working-area/delta_berkeleydb/db-6.1.23.tar.gz.HEAD db-6.1.23 master

author: Lorry Tar Creator <lorry-tar-importer@baserock.org> 2015-02-17 17:25:57 +0000
committer: <> 2015-03-17 16:26:24 +0000
commit: 780b92ada9afcf1d58085a83a0b9e6bc982203d1 (patch)
tree: 598f8b9fa431b228d29897e798de4ac0c1d3d970 /src/mutex
parent: 7a2660ba9cc2dc03a69ddfcfd95369395cc87444 (diff)
download: berkeleydb-master.tar.gz
12 files changed, 1078 insertions, 617 deletions
diff --git a/src/mutex/mut_alloc.c b/src/mutex/mut_alloc.c
index 5df3de53..06b3541e 100644
--- a/src/mutex/mut_alloc.c
+++ b/src/mutex/mut_alloc.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,6 +9,9 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/log.h"
+
+static char *__mutex_action_print __P((MUTEX_ACTION));
 
 /*
  * __mutex_alloc --
@@ -35,8 +38,7 @@ __mutex_alloc(env, alloc_id, flags, indxp)
 	if (alloc_id != MTX_APPLICATION && alloc_id != MTX_MUTEX_TEST &&
 	    (F_ISSET(env->dbenv, DB_ENV_NOLOCKING) ||
 	    (!F_ISSET(env, ENV_THREAD) &&
-	    (LF_ISSET(DB_MUTEX_PROCESS_ONLY) ||
-	    F_ISSET(env, ENV_PRIVATE)))))
+	    (LF_ISSET(DB_MUTEX_PROCESS_ONLY) || F_ISSET(env, ENV_PRIVATE)))))
 		return (0);
 
 	/* Private environments never share mutexes. */
@@ -109,13 +111,17 @@ nomem:			__db_errx(env, DB_STR("2034",
 		    mtxregion->stat.st_mutex_max)
 			cnt = mtxregion->stat.st_mutex_max -
 			    mtxregion->stat.st_mutex_cnt;
+
+		/* Set i to the first newly created db_mutex_t. */
 		if (F_ISSET(env, ENV_PRIVATE)) {
 			F_SET(&mtxmgr->reginfo, REGION_TRACKED);
 			while (__env_alloc(&mtxmgr->reginfo,
 			    (cnt * mtxregion->mutex_size) +
-			    mtxregion->stat.st_mutex_align, &i) != 0)
-				if ((cnt >> 1) == 0)
+			    mtxregion->stat.st_mutex_align, &i) != 0) {
+				cnt >>= 1;
+				if (cnt == 0)
 					break;
+			}
 			F_CLR(&mtxmgr->reginfo, REGION_TRACKED);
 			i = (db_mutex_t)ALIGNP_INC(i,
 			    mtxregion->stat.st_mutex_align);
@@ -130,21 +136,16 @@ nomem:			__db_errx(env, DB_STR("2034",
 		}
 		if (cnt == 0)
 			goto nomem;
-		mutexp = MUTEXP_SET(env, i);
+
 		mtxregion->stat.st_mutex_free = cnt;
 		mtxregion->mutex_next = i;
 		mtxregion->stat.st_mutex_cnt += cnt;
-		while (--cnt > 0) {
-			mutexp->flags = 0;
-			if (F_ISSET(env, ENV_PRIVATE))
-				mutexp->mutex_next_link =
-				    (uintptr_t)(mutexp + 1);
-			else
-				mutexp->mutex_next_link = ++i;
-			mutexp++;
-		}
-		mutexp->flags = 0;
-		mutexp->mutex_next_link = MUTEX_INVALID;
+
+		/*
+		 * Now link the rest of the newly allocated db_mutex_t's into
+		 * the free list.
+		 */
+		MUTEX_BULK_INIT(env, mtxregion, i, cnt);
 	}
 
 	*indxp = mtxregion->mutex_next;
@@ -158,14 +159,12 @@ nomem:			__db_errx(env, DB_STR("2034",
 	if (mtxregion->stat.st_mutex_inuse > mtxregion->stat.st_mutex_inuse_max)
 		mtxregion->stat.st_mutex_inuse_max =
 		    mtxregion->stat.st_mutex_inuse;
-	if (locksys)
-		MUTEX_SYSTEM_UNLOCK(env);
 
 	/* Initialize the mutex. */
 	memset(mutexp, 0, sizeof(*mutexp));
 	F_SET(mutexp, DB_MUTEX_ALLOCATED |
-	    LF_ISSET(DB_MUTEX_LOGICAL_LOCK |
-		DB_MUTEX_PROCESS_ONLY | DB_MUTEX_SHARED));
+	    LF_ISSET(DB_MUTEX_LOGICAL_LOCK | DB_MUTEX_PROCESS_ONLY |
+		DB_MUTEX_SELF_BLOCK | DB_MUTEX_SHARED));
 
 	/*
 	 * If the mutex is associated with a single process, set the process
@@ -182,7 +181,9 @@ nomem:			__db_errx(env, DB_STR("2034",
 #endif
 
 	if ((ret = __mutex_init(env, *indxp, flags)) != 0)
-		(void)__mutex_free_int(env, locksys, indxp);
+		(void)__mutex_free_int(env, 0, indxp);
+	if (locksys)
+		MUTEX_SYSTEM_UNLOCK(env);
 
 	return (ret);
 }
@@ -262,6 +263,44 @@ __mutex_free_int(env, locksys, indxp)
 	return (ret);
 }
 
+#ifdef HAVE_FAILCHK_BROADCAST
+/*
+ * __mutex_died --
+ *	Announce that a mutex request couldn't been granted because the last
+ *	thread to own it was killed by failchk. Sets ENV_DEAD_MUTEX in the
+ *	possibly shared environment so that mutex unlock calls don't complain.
+ *
+ *
+ * PUBLIC: int __mutex_died __P((ENV *, db_mutex_t));
+ */
+int
+__mutex_died(env, mutex)
+	ENV *env;
+	db_mutex_t mutex;
+{
+	DB_ENV *dbenv;
+	DB_EVENT_MUTEX_DIED_INFO info;
+	DB_MUTEX *mutexp;
+	char tidstr[DB_THREADID_STRLEN], failmsg[DB_FAILURE_SYMPTOM_SIZE];
+
+	dbenv = env->dbenv;
+
+	mutexp = MUTEXP_SET(env, mutex);
+	info.mutex = mutex;
+	info.pid = mutexp->pid;
+	info.tid = mutexp->tid;
+	(void)dbenv->thread_id_string(dbenv, mutexp->pid, mutexp->tid, tidstr);
+	(void)__mutex_describe(env, mutex, info.desc);
+	(void)snprintf(failmsg, sizeof(failmsg), DB_STR_A("2073",
+	    "Mutex died: %s owned %s", "%s %s"), tidstr, info.desc);
+	__db_errx(env, "%s", failmsg);
+	/* If this is the first crashed process, save its description. */
+	(void)__env_failure_remember(env, failmsg);
+	DB_EVENT(env, DB_EVENT_MUTEX_DIED, &info);
+	return (__env_panic(env, USR_ERR(env, DB_RUNRECOVERY)));
+}
+#endif
+
 /*
  * __mutex_refresh --
  *	Reinitialize a mutex, if we are not sure of its state.
@@ -289,3 +328,154 @@ __mutex_refresh(env, mutex)
 	}
 	return (ret);
 }
+
+/*
+ * __mutex_record_lock --
+ *	Record that this thread is about to lock a latch.
+ *	The last parameter is updated to point to this mutex's entry in the
+ *	per-thread mutex state array, so that it can update it if it gets the
+ *	mutex, or free it if the mutex is not acquired (e.g. it times out).
+ *	Mutexes which can be unlocked by other threads are not placed in this
+ *	list, because it would be too costly for that other thread to to find
+ *	the right slot to clear. The caller has already checked that thread
+ *	tracking is enabled.
+ *
+ * PUBLIC: int __mutex_record_lock
+ * PUBLIC:     __P((ENV *, db_mutex_t, MUTEX_ACTION, MUTEX_STATE **));
+ */
+int
+__mutex_record_lock(env, mutex, action, retp)
+	ENV *env;
+	db_mutex_t mutex;
+	MUTEX_ACTION action;
+	MUTEX_STATE **retp;
+{
+	DB_MUTEX *mutexp;
+	DB_THREAD_INFO *ip;
+	int i, ret;
+
+	*retp = NULL;
+	mutexp = MUTEXP_SET(env, mutex);
+	if (!F_ISSET(mutexp, DB_MUTEX_SHARED))
+		return (0);
+	if ((ret = __env_set_state(env, &ip, THREAD_VERIFY)) != 0)
+		return (ret);
+	for (i = 0; i != MUTEX_STATE_MAX; i++) {
+		if (ip->dbth_latches[i].action == MUTEX_ACTION_UNLOCKED) {
+			ip->dbth_latches[i].mutex = mutex;
+			ip->dbth_latches[i].action = action;
+#ifdef DIAGNOSTIC
+			__os_gettime(env, &ip->dbth_latches[i].when, 0);
+#endif
+			*retp = &ip->dbth_latches[i];
+			return (0);
+		}
+	}
+	__db_errx(env, DB_STR_A("2074",
+	    "No space available in latch table for %lu", "%lu"), (u_long)mutex);
+	(void)__mutex_record_print(env, ip);
+	return (__env_panic(env, USR_ERR(env, DB_RUNRECOVERY)));
+}
+
+/*
+ * __mutex_record_unlock --
+ *	Verify that this thread owns the mutex it is about to unlock.
+ *
+ * PUBLIC: int __mutex_record_unlock __P((ENV *, db_mutex_t));
+ */
+int
+__mutex_record_unlock(env, mutex)
+	ENV *env;
+	db_mutex_t mutex;
+{
+	DB_MUTEX *mutexp;
+	DB_THREAD_INFO *ip;
+	int i, ret;
+
+	if (env->thr_hashtab == NULL)
+		return (0);
+	mutexp = MUTEXP_SET(env, mutex);
+	if (!F_ISSET(mutexp, DB_MUTEX_SHARED))
+		return (0);
+	if ((ret = __env_set_state(env, &ip, THREAD_VERIFY)) != 0)
+		return (ret);
+	for (i = 0; i != MUTEX_STATE_MAX; i++) {
+		if (ip->dbth_latches[i].mutex == mutex &&
+		    ip->dbth_latches[i].action != MUTEX_ACTION_UNLOCKED) {
+			ip->dbth_latches[i].action = MUTEX_ACTION_UNLOCKED;
+			return (0);
+		}
+	}
+	(void)__mutex_record_print(env, ip);
+	if (ip->dbth_state == THREAD_FAILCHK) {
+		DB_DEBUG_MSG(env, "mutex_record_unlock %lu by failchk thread",
+		    (u_long)mutex);
+		return (0);
+	}
+	__db_errx(env, DB_STR_A("2075",
+	    "Latch %lu was not held", "%lu"), (u_long)mutex);
+	return (__env_panic(env, USR_ERR(env, DB_RUNRECOVERY)));
+}
+
+static char *
+__mutex_action_print(action)
+	MUTEX_ACTION action;
+{
+	switch (action) {
+	case MUTEX_ACTION_UNLOCKED:
+		return ("unlocked");
+	case MUTEX_ACTION_INTEND_SHARE:
+		return ("waiting to share");
+	case MUTEX_ACTION_SHARED:
+		return ("sharing");
+	default:
+		return ("unknown");
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * __mutex_record_print --
+ *	Display the thread's mutex state via __db_msg(), including any
+ *	information which would be relevant for db_stat or diagnostic messages.
+ *
+ * PUBLIC: int __mutex_record_print __P((ENV *, DB_THREAD_INFO *));
+ */
+int
+__mutex_record_print(env, ip)
+	ENV *env;
+	DB_THREAD_INFO *ip;
+{
+	DB_MSGBUF mb, *mbp;
+	db_mutex_t mutex;
+	int i;
+	char desc[DB_MUTEX_DESCRIBE_STRLEN];
+	char time_buf[CTIME_BUFLEN];
+
+	DB_MSGBUF_INIT(&mb);
+	mbp = &mb;
+	for (i = 0; i != MUTEX_STATE_MAX; i++) {
+		if (ip->dbth_latches[i].action == MUTEX_ACTION_UNLOCKED)
+			continue;
+		if ((mutex = ip->dbth_latches[i].mutex) ==
+		    MUTEX_INVALID)
+			continue;
+		time_buf[4] = '\0';
+#ifdef DIAGNOSTIC
+		if (timespecisset(&ip->dbth_latches[i].when))
+			(void)__db_ctimespec(&ip->dbth_latches[i].when,
+			    time_buf);
+		else
+#endif
+			time_buf[0] = '\0';
+
+		__db_msgadd(env, mbp, "%s %s %s ",
+		    __mutex_describe(env, mutex, desc),
+		    __mutex_action_print(ip->dbth_latches[i].action), time_buf);
+#ifdef HAVE_STATISTICS
+		__mutex_print_debug_stats(env, mbp, mutex, 0);
+#endif
+		DB_MSGBUF_FLUSH(env, mbp);
+	}
+	return (0);
+}
diff --git a/src/mutex/mut_failchk.c b/src/mutex/mut_failchk.c
index 1425389f..28e5d992 100644
--- a/src/mutex/mut_failchk.c
+++ b/src/mutex/mut_failchk.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -9,68 +9,193 @@
 #include "db_config.h"
 
 #include "db_int.h"
+#include "dbinc/lock.h"
+
+static int __mutex_failchk_single __P((ENV *, db_mutex_t, DB_THREAD_INFO *));
 
 /*
- * __mut_failchk --
- *	Check for mutexes held by dead processes.
+ * __mutex_failchk --
+ *	Clean up after dead processes which left behind allocated per-process or
+ *	locked mutexes.
  *
- * PUBLIC: int __mut_failchk __P((ENV *));
+ * PUBLIC: int __mutex_failchk __P((ENV *));
  */
 int
-__mut_failchk(env)
+__mutex_failchk(env)
 	ENV *env;
 {
-	DB_ENV *dbenv;
-	DB_MUTEX *mutexp;
+	DB_HASHTAB *htab;
 	DB_MUTEXMGR *mtxmgr;
 	DB_MUTEXREGION *mtxregion;
-	db_mutex_t i;
-	int ret;
-	char buf[DB_THREADID_STRLEN];
-	db_threadid_t unused;
+	DB_THREAD_INFO *ip;
+	db_mutex_t mutex;
+	unsigned i;
+	int count;
 
-	if (F_ISSET(env, ENV_PRIVATE))
+	if (F_ISSET(env, ENV_PRIVATE) || (htab = env->thr_hashtab) == NULL)
 		return (0);
 
-	DB_THREADID_INIT(unused);
-
-	dbenv = env->dbenv;
 	mtxmgr = env->mutex_handle;
 	mtxregion = mtxmgr->reginfo.primary;
-	ret = 0;
+	count = 0;
 
+	DB_ASSERT(env, F_ISSET(env->dbenv, DB_ENV_FAILCHK));
 	MUTEX_SYSTEM_LOCK(env);
-	for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i, ++mutexp) {
-		mutexp = MUTEXP_SET(env, i);
 
-		/*
-		 * We're looking for per-process mutexes where the process
-		 * has died.
-		 */
-		if (!F_ISSET(mutexp, DB_MUTEX_ALLOCATED) ||
-		    !F_ISSET(mutexp, DB_MUTEX_PROCESS_ONLY))
+	/*
+	 * The first loop does each thread's read-locked latches; the second
+	 * does all locked mutexes.
+	 */
+	for (i = 0; i < env->thr_nbucket; i++)
+		SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) {
+			if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE)
+				continue;
+			count += __mutex_failchk_thread(env, ip);
+		}
+
+	for (mutex = 1; mutex <= mtxregion->stat.st_mutex_cnt; mutex++)
+		if (__mutex_failchk_single(env, mutex, NULL) != 0)
+			count++;
+
+	MUTEX_SYSTEM_UNLOCK(env);
+
+	if (count == 0)
+		return (count);
+	else
+		return (USR_ERR(env, DB_RUNRECOVERY));
+}
+
+/*
+ * __mutex_failchk_thread -
+ *	Do the per-latch failchk work on each of this thread's shared latches.
+ *
+ * PUBLIC: int __mutex_failchk_thread __P((ENV *, DB_THREAD_INFO *));
+ */
+int
+__mutex_failchk_thread(env, ip)
+	ENV *env;
+	DB_THREAD_INFO *ip;
+{
+	db_mutex_t mutex;
+	int count, i;
+
+	count = 0;
+	for (i = 0; i != MUTEX_STATE_MAX; i++) {
+		if (ip->dbth_latches[i].action == MUTEX_ACTION_UNLOCKED ||
+		    (mutex = ip->dbth_latches[i].mutex) == MUTEX_INVALID)
 			continue;
+		if (__mutex_failchk_single(env, mutex, ip) != 0)
+			count++;
+	}
+	return (count);
+}
 
+/*
+ * __mutex_failchk_single --
+ *	Determine whether this mutex is locked or shared by a potentially
+ *	dead thread. If so, and the call to is_alive() finds that it is dead,
+ *	clean up if possible (a process-only mutex); else wake up any waiters.
+ */
+static int
+__mutex_failchk_single(env, mutex, ip)
+	ENV *env;
+	db_mutex_t mutex;
+	DB_THREAD_INFO *ip;
+{
+	DB_ENV *dbenv;
+	DB_MUTEX *mutexp;
+	db_threadid_t threadid;
+	pid_t pid;
+	int already_dead, ret;
+	u_int32_t flags;
+	char id_str[DB_THREADID_STRLEN];
+	char mtx_desc[DB_MUTEX_DESCRIBE_STRLEN];
+
+	dbenv = env->dbenv;
+	mutexp = MUTEXP_SET(env, mutex);
+	flags = mutexp->flags;
+	/*
+	 * Filter out mutexes which couldn't possibly be "interesting", in order
+	 * to reduce the number of possibly costly is_alive() calls. Check that:
+	 *	it is allocated
+	 *	is it either locked, or a shared latch, or a per-process mutex
+	 *	it is nether a logical lock, nor self-block, nor already dead.
+	 * Self-blocking mutexes are skipped because it is expected that they
+	 * can still be locked even though they are really 'idle', as with
+	 * the wait case in __lock_get_internal(), LOG->free_commits, and
+	 * __rep_waiter->mtx_repwait; or they were allocated by the application.
+	 */
+	if (!LF_ISSET(DB_MUTEX_ALLOCATED))
+		return (0);
+	if (!LF_ISSET(
+	    DB_MUTEX_SHARED | DB_MUTEX_LOCKED | DB_MUTEX_PROCESS_ONLY))
+		return (0);
+	if (LF_ISSET(
+	    DB_MUTEX_SELF_BLOCK | DB_MUTEX_LOGICAL_LOCK | DB_MUTEX_OWNER_DEAD))
+		return (0);
+
+	already_dead = ip != NULL && timespecisset(&ip->dbth_failtime);
+	/*
+	 * The pid in the mutex is valid when for locked or per-process mutexes.
+	 * The tid is correct only when exclusively locked. It's okay to look at
+	 * the tid of an unlocked per-process mutex, we won't use it in the
+	 * is_alive() call.
+	 */
+	if (LF_ISSET(DB_MUTEX_LOCKED | DB_MUTEX_PROCESS_ONLY)) {
+		pid = mutexp->pid;
+		threadid = mutexp->tid;
+	} else {
+		DB_ASSERT(env, LF_ISSET(DB_MUTEX_SHARED));
 		/*
-		 * The thread that allocated the mutex may have exited, but
-		 * we cannot reclaim the mutex if the process is still alive.
+		 * If we get here with no thread, then this is an shared latch
+		 * which is neither locked nor shared, we're done with it.
 		 */
-		if (dbenv->is_alive(
-		    dbenv, mutexp->pid, unused, DB_MUTEX_PROCESS_ONLY))
-			continue;
+		if (ip == NULL)
+			return (0);
+		pid = ip->dbth_pid;
+		threadid = ip->dbth_tid;
+	}
+	if (!already_dead && dbenv->is_alive(dbenv,
+	    pid, threadid, LF_ISSET(DB_MUTEX_PROCESS_ONLY)))
+		return (0);
+
+	/* The thread is dead; the mutex type indicates the kind of cleanup. */
+	(void)dbenv->thread_id_string(dbenv, pid, threadid, id_str);
+	(void)__mutex_describe(env, mutex, mtx_desc);
 
-		__db_msg(env, DB_STR_A("2017",
-		    "Freeing mutex for process: %s", "%s"),
-		    dbenv->thread_id_string(dbenv, mutexp->pid, unused, buf));
+	if (LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
+		if (already_dead)
+			return (0);
+
+		__db_errx(env, DB_STR_A("2065",
+		    "Freeing %s for process: %s", "%s %s"), mtx_desc, id_str);
+
+		/* Clear the mutex id if it is in a cached locker. */
+		if ((ret = __lock_local_locker_invalidate(env, mutex)) != 0)
+			return (ret);
 
 		/* Unlock and free the mutex. */
-		if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
-			MUTEX_UNLOCK(env, i);
+		if (LF_ISSET(DB_MUTEX_LOCKED))
+			MUTEX_UNLOCK(env, mutex);
 
-		if ((ret = __mutex_free_int(env, 0, &i)) != 0)
-			break;
+		return (__mutex_free_int(env, 0, &mutex));
 	}
-	MUTEX_SYSTEM_UNLOCK(env);
-
-	return (ret);
+#ifdef HAVE_FAILCHK_BROADCAST
+	else if (LF_ISSET(DB_MUTEX_LOCKED)) {
+		__db_errx(env, DB_STR_A("2066",
+		    "Marking %s as owned by dead thread %s", "%lu %s"),
+		    mtx_desc, id_str);
+		F_SET(mutexp, DB_MUTEX_OWNER_DEAD);
+	} else if (LF_ISSET(DB_MUTEX_SHARED)) {
+		__db_errx(env, DB_STR_A("2067",
+		    "Marking %s as shared by dead thread %s", "%lu %s"),
+		    mtx_desc, id_str);
+		F_SET(mutexp, DB_MUTEX_OWNER_DEAD);
+	} else {
+		__db_errx(env, DB_STR_A("2068",
+	"mutex_failchk: unknown state for %s with dead thread %s", "%lu %s"),
+		    mtx_desc, id_str);
+	}
+#endif
+	return (USR_ERR(env, DB_RUNRECOVERY));
 }
diff --git a/src/mutex/mut_fcntl.c b/src/mutex/mut_fcntl.c
deleted file mode 100644
index 0694aa59..00000000
--- a/src/mutex/mut_fcntl.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-
-static inline int __db_fcntl_mutex_lock_int
-	    __P((ENV *, db_mutex_t, db_timeout_t, int));
-
-/*
- * __db_fcntl_mutex_init --
- *	Initialize a fcntl mutex.
- *
- * PUBLIC: int __db_fcntl_mutex_init __P((ENV *, db_mutex_t, u_int32_t));
- */
-int
-__db_fcntl_mutex_init(env, mutex, flags)
-	ENV *env;
-	db_mutex_t mutex;
-	u_int32_t flags;
-{
-	COMPQUIET(env, NULL);
-	COMPQUIET(mutex, MUTEX_INVALID);
-	COMPQUIET(flags, 0);
-
-	return (0);
-}
-
-/*
- * __db_fcntl_mutex_lock_int
- *	Internal function to lock a mutex, blocking only when requested
- */
-inline int
-__db_fcntl_mutex_lock_int(env, mutex, timeout, wait)
-	ENV *env;
-	db_mutex_t mutex;
-	db_timeout_t timeout;
-	int wait;
-{
-	DB_ENV *dbenv;
-	DB_MUTEX *mutexp;
-	DB_THREAD_INFO *ip;
-	struct flock k_lock;
-	int locked, ms, ret;
-	db_timespec now, timespec;
-	db_timeout_t time_left;
-
-	dbenv = env->dbenv;
-
-	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
-		return (0);
-
-	mutexp = MUTEXP_SET(env, mutex);
-
-	CHECK_MTX_THREAD(env, mutexp);
-
-#ifdef HAVE_STATISTICS
-	if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
-		++mutexp->mutex_set_wait;
-	else
-		++mutexp->mutex_set_nowait;
-#endif
-
-	/* Initialize the lock. */
-	k_lock.l_whence = SEEK_SET;
-	k_lock.l_start = mutex;
-	k_lock.l_len = 1;
-
-	if (timeout != 0) {
-		timespecclear(&timespec);
-		__clock_set_expires(env, &timespec, timeout);
-	}
-
-	/*
-	 * Only check the thread state once, by initializing the thread
-	 * control block pointer to null.  If it is not the failchk
-	 * thread, then ip will have a valid value subsequent times
-	 * in the loop.
-	 */
-	ip = NULL;
-
-	for (locked = 0;;) {
-		/*
-		 * Wait for the lock to become available; wait 1ms initially,
-		 * up to 1 second.
-		 */
-		for (ms = 1; F_ISSET(mutexp, DB_MUTEX_LOCKED);) {
-			if (F_ISSET(dbenv, DB_ENV_FAILCHK) &&
-			    ip == NULL && dbenv->is_alive(dbenv,
-			    mutexp->pid, mutexp->tid, 0) == 0) {
-				ret = __env_set_state(env, &ip, THREAD_VERIFY);
-				if (ret != 0 ||
-				    ip->dbth_state == THREAD_FAILCHK)
-					return (DB_RUNRECOVERY);
-			}
-			if (!wait)
-				return (DB_LOCK_NOTGRANTED);
-			if (timeout != 0) {
-				timespecclear(&now);
-				if (__clock_expired(env, &now, &timespec))
-					return (DB_TIMEOUT);
-				DB_TIMESPEC_TO_TIMEOUT(time_left, &now, 0);
-				time_left = timeout - time_left;
-				if (ms * US_PER_MS > time_left)
-					ms = time_left / US_PER_MS;
-			}
-			__os_yield(NULL, 0, ms * US_PER_MS);
-			if ((ms <<= 1) > MS_PER_SEC)
-				ms = MS_PER_SEC;
-		}
-
-		/* Acquire an exclusive kernel lock on the byte. */
-		k_lock.l_type = F_WRLCK;
-		if (fcntl(env->lockfhp->fd, F_SETLKW, &k_lock))
-			goto err;
-
-		/* If the resource is still available, it's ours. */
-		if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
-			locked = 1;
-
-			F_SET(mutexp, DB_MUTEX_LOCKED);
-			dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
-		}
-
-		/* Release the kernel lock. */
-		k_lock.l_type = F_UNLCK;
-		if (fcntl(env->lockfhp->fd, F_SETLK, &k_lock))
-			goto err;
-
-		/*
-		 * If we got the resource lock we're done.
-		 *
-		 * !!!
-		 * We can't check to see if the lock is ours, because we may
-		 * be trying to block ourselves in the lock manager, and so
-		 * the holder of the lock that's preventing us from getting
-		 * the lock may be us!  (Seriously.)
-		 */
-		if (locked)
-			break;
-	}
-
-#ifdef DIAGNOSTIC
-	/*
-	 * We want to switch threads as often as possible.  Yield every time
-	 * we get a mutex to ensure contention.
-	 */
-	if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
-		__os_yield(env, 0, 0);
-#endif
-	return (0);
-
-err:	ret = __os_get_syserr();
-	__db_syserr(env, ret, DB_STR("2019", "fcntl lock failed"));
-	return (__env_panic(env, __os_posix_err(ret)));
-}
-
-/*
- * __db_fcntl_mutex_lock
- *	Lock a mutex, blocking if necessary.
- *
- * PUBLIC: int __db_fcntl_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t));
- */
-int
-__db_fcntl_mutex_lock(env, mutex, timeout)
-	ENV *env;
-	db_mutex_t mutex;
-	db_timeout_t timeout;
-{
-	return (__db_fcntl_mutex_lock_int(env, mutex, timeout, 1));
-}
-
-/*
- * __db_fcntl_mutex_trylock
- *	Try to lock a mutex, without blocking when it is busy.
- *
- * PUBLIC: int __db_fcntl_mutex_trylock __P((ENV *, db_mutex_t));
- */
-int
-__db_fcntl_mutex_trylock(env, mutex)
-	ENV *env;
-	db_mutex_t mutex;
-{
-	return (__db_fcntl_mutex_lock_int(env, mutex, 0, 0));
-}
-
-/*
- * __db_fcntl_mutex_unlock --
- *	Release a mutex.
- *
- * PUBLIC: int __db_fcntl_mutex_unlock __P((ENV *, db_mutex_t));
- */
-int
-__db_fcntl_mutex_unlock(env, mutex)
-	ENV *env;
-	db_mutex_t mutex;
-{
-	DB_ENV *dbenv;
-	DB_MUTEX *mutexp;
-
-	dbenv = env->dbenv;
-
-	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
-		return (0);
-
-	mutexp = MUTEXP_SET(env, mutex);
-
-#ifdef DIAGNOSTIC
-	if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
-		__db_errx(env, DB_STR("2020",
-		    "fcntl unlock failed: lock already unlocked"));
-		return (__env_panic(env, EACCES));
-	}
-#endif
-
-	/*
-	 * Release the resource.  We don't have to acquire any locks because
-	 * processes trying to acquire the lock are waiting for the flag to
-	 * go to 0.  Once that happens the waiters will serialize acquiring
-	 * an exclusive kernel lock before locking the mutex.
-	 */
-	F_CLR(mutexp, DB_MUTEX_LOCKED);
-
-	return (0);
-}
-
-/*
- * __db_fcntl_mutex_destroy --
- *	Destroy a mutex.
- *
- * PUBLIC: int __db_fcntl_mutex_destroy __P((ENV *, db_mutex_t));
- */
-int
-__db_fcntl_mutex_destroy(env, mutex)
-	ENV *env;
-	db_mutex_t mutex;
-{
-	COMPQUIET(env, NULL);
-	COMPQUIET(mutex, MUTEX_INVALID);
-
-	return (0);
-}
diff --git a/src/mutex/mut_method.c b/src/mutex/mut_method.c
index cb666082..99bafeae 100644
--- a/src/mutex/mut_method.c
+++ b/src/mutex/mut_method.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -371,6 +371,33 @@ __mutex_set_tas_spins(dbenv, tas_spins)
 	return (0);
 }
 
+#ifdef HAVE_ERROR_HISTORY
+/*
+ * __mutex_diags --
+ *
+ * PUBLIC: #ifdef HAVE_ERROR_HISTORY
+ * PUBLIC: int __mutex_diags __P((ENV *, db_mutex_t, int));
+ * PUBLIC: #endif
+ */
+int
+__mutex_diags(env, mutex, error)
+	ENV *env;
+	db_mutex_t mutex;
+	int error;
+{
+	DB_MSGBUF *mb;
+
+	if ((mb = __db_deferred_get()) != NULL) {
+		(void)__db_remember_context(env, mb, error);
+		__db_msgadd(env, mb, "Mutex %u ", (unsigned int)mutex);
+#ifdef HAVE_STATISTICS
+		__mutex_print_debug_stats(env, mb, mutex, 0);
+#endif
+	}
+	return (error);
+}
+#endif
+
 #if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT)
 /*
  * Provide atomic operations for platforms which have mutexes yet do not have
diff --git a/src/mutex/mut_pthread.c b/src/mutex/mut_pthread.c
index 1ec4fb9c..4b2cfb81 100644
--- a/src/mutex/mut_pthread.c
+++ b/src/mutex/mut_pthread.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -64,6 +64,19 @@
 } while (0)
 
 /*
+ * !!!
+ * Solaris bug workaround: pthread_cond_wait() sometimes returns ETIME  -- out
+ * of sheer paranoia, check both ETIME and ETIMEDOUT.  We believe this happens
+ * when the application uses SIGALRM for some purpose, e.g., the C library sleep
+ * call, and Solaris delivers the signal to the wrong LWP.
+ */
+#ifdef ETIME
+#define	ETIME_TO_ETIMEDOUT(ret)	((ret) == ETIME ? ETIMEDOUT : (ret))
+#else
+#define	ETIME_TO_ETIMEDOUT(ret)	(ret)
+#endif
+
+/*
  * __db_pthread_mutex_init --
  *	Initialize a pthread mutex: either a native one or
  *	just the mutex for block/wakeup of a hybrid test-and-set mutex
@@ -104,18 +117,18 @@ __db_pthread_mutex_init(env, mutex, flags)
 		pthread_rwlockattr_t rwlockattr, *rwlockattrp = NULL;
 #ifndef HAVE_MUTEX_THREAD_ONLY
 		if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
-			RET_SET((pthread_rwlockattr_init(&rwlockattr)), ret);
+			RET_SET(pthread_rwlockattr_init(&rwlockattr), ret);
 			if (ret != 0)
 				goto err;
-			RET_SET((pthread_rwlockattr_setpshared(
-			    &rwlockattr, PTHREAD_PROCESS_SHARED)), ret);
+			RET_SET(pthread_rwlockattr_setpshared(
+			    &rwlockattr, PTHREAD_PROCESS_SHARED), ret);
 			rwlockattrp = &rwlockattr;
 		}
 #endif
 
 		if (ret == 0)
-			RET_SET((pthread_rwlock_init(&mutexp->u.rwlock,
-			    rwlockattrp)), ret);
+			RET_SET(pthread_rwlock_init(&mutexp->u.rwlock,
+			    rwlockattrp), ret);
 		if (rwlockattrp != NULL)
 			(void)pthread_rwlockattr_destroy(rwlockattrp);
 
@@ -127,18 +140,18 @@ __db_pthread_mutex_init(env, mutex, flags)
 #endif
 #ifndef HAVE_MUTEX_THREAD_ONLY
 	if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
-		RET_SET((pthread_mutexattr_init(&mutexattr)), ret);
+		RET_SET(pthread_mutexattr_init(&mutexattr), ret);
 		if (ret != 0)
 			goto err;
-		RET_SET((pthread_mutexattr_setpshared(
-		    &mutexattr, PTHREAD_PROCESS_SHARED)), ret);
+		RET_SET(pthread_mutexattr_setpshared(
+		    &mutexattr, PTHREAD_PROCESS_SHARED), ret);
 		mutexattrp = &mutexattr;
 	}
 #endif
 
 	if (ret == 0)
 		RET_SET(
-		    (pthread_mutex_init(&mutexp->u.m.mutex, mutexattrp)), ret);
+		    pthread_mutex_init(&mutexp->u.m.mutex, mutexattrp), ret);
 
 	if (mutexattrp != NULL)
 		(void)pthread_mutexattr_destroy(mutexattrp);
@@ -147,19 +160,19 @@ __db_pthread_mutex_init(env, mutex, flags)
 	if (LF_ISSET(DB_MUTEX_SELF_BLOCK)) {
 #ifndef HAVE_MUTEX_THREAD_ONLY
 		if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
-			RET_SET((pthread_condattr_init(&condattr)), ret);
+			RET_SET(pthread_condattr_init(&condattr), ret);
 			if (ret != 0)
 				goto err;
 
 			condattrp = &condattr;
-			RET_SET((pthread_condattr_setpshared(
-			    &condattr, PTHREAD_PROCESS_SHARED)), ret);
+			RET_SET(pthread_condattr_setpshared(
+			    &condattr, PTHREAD_PROCESS_SHARED), ret);
 		}
 #endif
 
 		if (ret == 0)
-			RET_SET((pthread_cond_init(
-			    &mutexp->u.m.cond, condattrp)), ret);
+			RET_SET(pthread_cond_init(
+			    &mutexp->u.m.cond, condattrp), ret);
 
 		F_SET(mutexp, DB_MUTEX_SELF_BLOCK);
 		if (condattrp != NULL)
@@ -239,6 +252,9 @@ __db_pthread_mutex_prep(env, mutex, mutexp, exclusive)
 {
 	DB_ENV *dbenv;
 	DB_THREAD_INFO *ip;
+#ifdef HAVE_FAILCHK_BROADCAST
+	db_timespec timespec;
+#endif
 	int ret;
 
 	dbenv = env->dbenv;
@@ -266,13 +282,32 @@ __db_pthread_mutex_prep(env, mutex, mutexp, exclusive)
 					 * hadn't gone down the 'if
 					 * DB_ENV_FAILCHK' path to start with.
 					 */
-				    RET_SET_PTHREAD_LOCK(mutexp, ret);
-				    break;
+					goto lockit;
 				}
+				__os_yield(env, 0, 10);
 			}
 		}
-	} else
-		RET_SET_PTHREAD_LOCK(mutexp, ret);
+	} else {
+lockit:
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (dbenv->mutex_failchk_timeout != 0) {
+			timespecclear(&timespec);
+			__clock_set_expires(env,
+			    &timespec, dbenv->mutex_failchk_timeout);
+			do {
+				RET_SET_PTHREAD_TIMEDLOCK(mutexp,
+				    (struct timespec *)&timespec, ret);
+				ret = ETIME_TO_ETIMEDOUT(ret);
+				if (ret == ETIMEDOUT &&
+				    F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+				    !F_ISSET(dbenv, DB_ENV_FAILCHK))
+					ret = USR_ERR(env,
+					    __mutex_died(env, mutex));
+			} while  (ret == ETIMEDOUT);
+		} else
+#endif
+			RET_SET_PTHREAD_LOCK(mutexp, ret);
+	}
 
 	PERFMON4(env,
 	    mutex, resume, mutex, exclusive, mutexp->alloc_id, mutexp);
@@ -302,49 +337,75 @@ __db_pthread_mutex_condwait(env, mutex, mutexp, timespec)
 	DB_MUTEX *mutexp;
 	db_timespec *timespec;
 {
+	DB_ENV *dbenv;
 	int ret;
-
-#ifdef MUTEX_DIAG
-	printf("condwait %ld %x wait busy %x count %d\n",
-	    mutex, pthread_self(), MUTEXP_BUSY_FIELD(mutexp), mutexp->wait);
+#ifdef HAVE_FAILCHK_BROADCAST
+	db_timespec failchk_timespec;
 #endif
+
+	dbenv = env->dbenv;
 	PERFMON4(env, mutex, suspend, mutex, TRUE, mutexp->alloc_id, mutexp);
 
+#ifdef HAVE_FAILCHK_BROADCAST
+	/*
+	 * If the failchk timeout would be soon than the timeout passed in,
+	 * argument, use the failchk timeout. The caller handles "short" waits.
+	 */
+	if (dbenv->mutex_failchk_timeout != 0) {
+		timespecclear(&failchk_timespec);
+		__clock_set_expires(env,
+		    &failchk_timespec, dbenv->mutex_failchk_timeout);
+		if (timespec == NULL ||
+		    timespeccmp(timespec, &failchk_timespec, >))
+			timespec = &failchk_timespec;
+	}
+#endif
+
 	if (timespec != NULL) {
-		RET_SET((pthread_cond_timedwait(&mutexp->u.m.cond,
-		    &mutexp->u.m.mutex, (struct timespec *) timespec)), ret);
+		RET_SET(pthread_cond_timedwait(&mutexp->u.m.cond,
+		    &mutexp->u.m.mutex, (struct timespec *) timespec), ret);
+		ret = ETIME_TO_ETIMEDOUT(ret);
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+		    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+			ret = USR_ERR(env, __mutex_died(env, mutex));
+			goto err;
+		}
+#endif
 		if (ret == ETIMEDOUT) {
 			ret = DB_TIMEOUT;
-			goto ret;
+			goto err;
 		}
 	} else
-		RET_SET((pthread_cond_wait(&mutexp->u.m.cond,
-		    &mutexp->u.m.mutex)), ret);
-#ifdef MUTEX_DIAG
-	printf("condwait %ld %x wait returns %d busy %x\n",
-	    mutex, pthread_self(), ret, MUTEXP_BUSY_FIELD(mutexp));
+		RET_SET(pthread_cond_wait(&mutexp->u.m.cond,
+		    &mutexp->u.m.mutex), ret);
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (ret == 0 && F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+	    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+		ret = USR_ERR(env, __mutex_died(env, mutex));
+		goto err;
+	}
 #endif
 	/*
 	 * !!!
 	 * Solaris bug workaround: pthread_cond_wait() sometimes returns ETIME
-	 * -- out  of sheer paranoia, check both ETIME and ETIMEDOUT.  We
+	 * -- out of sheer paranoia, check both ETIME and ETIMEDOUT.  We
 	 * believe this happens when the application uses SIGALRM for some
 	 * purpose, e.g., the C library sleep call, and Solaris delivers the
-	 * signal to the wrong  LWP.
+	 * signal to the wrong LWP.
 	 */
 	if (ret != 0) {
-		if (ret == ETIMEDOUT ||
-#ifdef ETIME
-		    ret == ETIME ||
-#endif
+		if ((ret = ETIME_TO_ETIMEDOUT(ret)) == ETIMEDOUT ||
 		    ret == EINTR)
 			ret = 0;
-		else
+		else {
 			/* Failure, caller shouldn't condwait again. */
 			(void)pthread_mutex_unlock(&mutexp->u.m.mutex);
+			(void)MUTEX_ERR(env, mutex, ret);
+		}
 	}
 
-ret:
+err:
 	PERFMON4(env, mutex, resume, mutex, TRUE, mutexp->alloc_id, mutexp);
 
 	COMPQUIET(mutex, 0);
@@ -356,7 +417,10 @@ ret:
 /*
  * __db_pthread_mutex_lock
  *	Lock on a mutex, blocking if necessary.
- *	Timeouts are supported only for self-blocking mutexes.
+ *	Timeouts are supported only for self-blocking mutexes. When both a
+ *	given timeout and a dbenv-wide failchk timeout are specified, the
+ *	given timeout takes precedence -- a process failure might not be noticed
+ *	for a little while.
  *
  *	Self-blocking shared latches are not supported.
  *
@@ -372,6 +436,7 @@ __db_pthread_mutex_lock(env, mutex, timeout)
 {
 	DB_ENV *dbenv;
 	DB_MUTEX *mutexp;
+	db_timeout_t checktimeout;
 	db_timespec timespec;
 	int ret, t_ret;
 
@@ -385,7 +450,6 @@ __db_pthread_mutex_lock(env, mutex, timeout)
 
 	CHECK_MTX_THREAD(env, mutexp);
 
-#if defined(HAVE_STATISTICS)
 	/*
 	 * We want to know which mutexes are contentious, but don't want to
 	 * do an interlocked test here -- that's slower when the underlying
@@ -398,6 +462,11 @@ __db_pthread_mutex_lock(env, mutex, timeout)
 	else
 		STAT_INC(env,
 		    mutex, set_nowait, mutexp->mutex_set_nowait, mutex);
+
+	checktimeout = timeout;
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (checktimeout == 0 || checktimeout > dbenv->mutex_failchk_timeout)
+		checktimeout = dbenv->mutex_failchk_timeout;
 #endif
 
 	/* Single-thread the next block, except during the possible condwait. */
@@ -405,14 +474,12 @@ __db_pthread_mutex_lock(env, mutex, timeout)
 		goto err;
 
 	if (F_ISSET(mutexp, DB_MUTEX_SELF_BLOCK)) {
-		if (timeout != 0)
+		if (checktimeout != 0)
 			timespecclear(&timespec);
 		while (MUTEXP_IS_BUSY(mutexp)) {
 			/* Set expiration timer upon first need. */
-			if (timeout != 0 && !timespecisset(&timespec)) {
-				timespecclear(&timespec);
+			if (checktimeout != 0 && !timespecisset(&timespec))
 				__clock_set_expires(env, &timespec, timeout);
-			}
 			t_ret = __db_pthread_mutex_condwait(env,
 			    mutex, mutexp, timeout == 0 ? NULL : &timespec);
 			if (t_ret != 0) {
@@ -428,18 +495,20 @@ __db_pthread_mutex_lock(env, mutex, timeout)
 out:
 		/* #2471: HP-UX can sporadically return EFAULT. See above */
 		RETRY_ON_EFAULT(pthread_mutex_unlock(&mutexp->u.m.mutex), ret);
-		if (ret != 0)
+		if (ret != 0) {
+			(void)MUTEX_ERR(env, mutex, ret);
 			goto err;
+		}
 	} else {
 #ifdef DIAGNOSTIC
 		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
 			char buf[DB_THREADID_STRLEN];
 			(void)dbenv->thread_id_string(dbenv,
 			    mutexp->pid, mutexp->tid, buf);
+			ret = MUTEX_ERR(env, mutex, EINVAL);
 			__db_errx(env, DB_STR_A("2022",
 		    "pthread lock failed: lock currently in use: pid/tid: %s",
 			    "%s"), buf);
-			ret = EINVAL;
 			goto err;
 		}
 #endif
@@ -455,6 +524,13 @@ out:
 	if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
 		__os_yield(env, 0, 0);
 #endif
+#ifdef MUTEX_DIAG
+	if (t_ret == 0) {
+		__os_gettime(env, &mutexp->mutex_history.when, 0);
+		__os_stack_text(env, mutexp->mutex_history.stacktext,
+		    sizeof(mutexp->mutex_history.stacktext), 12, 2);
+	}
+#endif
 	return (t_ret);
 
 err:
@@ -479,6 +555,10 @@ __db_pthread_mutex_readlock(env, mutex)
 {
 	DB_ENV *dbenv;
 	DB_MUTEX *mutexp;
+	MUTEX_STATE *state;
+#ifdef HAVE_FAILCHK_BROADCAST
+	db_timespec timespec;
+#endif
 	int ret;
 
 	dbenv = env->dbenv;
@@ -491,7 +571,6 @@ __db_pthread_mutex_readlock(env, mutex)
 
 	CHECK_MTX_THREAD(env, mutexp);
 
-#if defined(HAVE_STATISTICS)
 	/*
 	 * We want to know which mutexes are contentious, but don't want to
 	 * do an interlocked test here -- that's slower when the underlying
@@ -505,15 +584,52 @@ __db_pthread_mutex_readlock(env, mutex)
 	else
 		STAT_INC(env,
 		    mutex, set_rd_nowait, mutexp->mutex_set_rd_nowait, mutex);
-#endif
+
+	state = NULL;
+	if (env->thr_hashtab != NULL && (ret = __mutex_record_lock(env,
+	    mutex, MUTEX_ACTION_INTEND_SHARE, &state)) != 0)
+		return (ret);
 
 	PERFMON4(env, mutex, suspend, mutex, FALSE, mutexp->alloc_id, mutexp);
-	RET_SET((pthread_rwlock_rdlock(&mutexp->u.rwlock)), ret);
+
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (dbenv->mutex_failchk_timeout != 0) {
+		do {
+			timespecclear(&timespec);
+			__clock_set_expires(env,
+			    &timespec, dbenv->mutex_failchk_timeout);
+			RET_SET(pthread_rwlock_timedrdlock(&mutexp->u.rwlock,
+			    (struct timespec *)&timespec), ret);
+			if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+			    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+				if (ret == 0)
+					RETRY_ON_EFAULT(pthread_rwlock_unlock(
+					    &mutexp->u.rwlock), ret);
+				ret = USR_ERR(env, __mutex_died(env, mutex));
+				goto err;
+			}
+		} while (ret == DB_TIMEOUT);
+	} else
+#endif
+		RET_SET(pthread_rwlock_rdlock(&mutexp->u.rwlock), ret);
+
 	PERFMON4(env, mutex, resume, mutex, FALSE, mutexp->alloc_id, mutexp);
 	DB_ASSERT(env, !F_ISSET(mutexp, DB_MUTEX_LOCKED));
 	if (ret != 0)
 		goto err;
 
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+	    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+		ret = USR_ERR(env, __mutex_died(env, mutex));
+		goto err;
+	}
+#endif
+#ifdef MUTEX_DIAG
+	__os_gettime(env, &mutexp->mutex_history.when, 0);
+	__os_stack_text(env, mutexp->mutex_history.stacktext,
+	    sizeof(mutexp->mutex_history.stacktext), 12, 2);
+#endif
 #ifdef DIAGNOSTIC
 	/*
 	 * We want to switch threads as often as possible.  Yield every time
@@ -524,7 +640,10 @@ __db_pthread_mutex_readlock(env, mutex)
 #endif
 	return (0);
 
-err:	__db_err(env, ret, DB_STR("2024", "pthread readlock failed"));
+err:
+	if (state != NULL)
+		state->action = MUTEX_ACTION_UNLOCKED;
+	__db_err(env, ret, DB_STR("2024", "pthread readlock failed"));
 	return (__env_panic(env, ret));
 }
 #endif
@@ -532,8 +651,10 @@ err:	__db_err(env, ret, DB_STR("2024", "pthread readlock failed"));
 #ifdef HAVE_MUTEX_HYBRID
 /*
  * __db_hybrid_mutex_suspend
- *	Suspend this thread until the mutex is free enough to give the caller a
- *	good chance of getting the mutex in the requested exclusivity mode.
+ *	Suspend this thread, usually until the mutex is free enough to give the
+ *	caller a good chance of getting the mutex in the requested exclusivity
+ *	mode. Return early if the timeout is reached or a dead mutex is found
+ *	to be dead.
  *
  *	The major difference between this and the old __db_pthread_mutex_lock()
  *	is the additional 'exclusive' parameter.
@@ -551,6 +672,9 @@ __db_hybrid_mutex_suspend(env, mutex, timespec, exclusive)
 	int exclusive;
 {
 	DB_MUTEX *mutexp;
+#ifdef HAVE_FAILCHECK_BROADCAST
+	db_timespec failchk_timespec;
+#endif
 	int ret, t_ret;
 
 	t_ret = 0;
@@ -571,7 +695,7 @@ __db_hybrid_mutex_suspend(env, mutex, timespec, exclusive)
 	 * before checking the wait counter.
 	 */
 	mutexp->wait++;
-	MUTEX_MEMBAR(mutexp->wait);
+	(void)MUTEX_MEMBAR(mutexp->wait);
 	while (exclusive ? MUTEXP_IS_BUSY(mutexp) :
 	    atomic_read(&mutexp->sharecount) == MUTEX_SHARE_ISEXCLUSIVE) {
 		t_ret = __db_pthread_mutex_condwait(env,
@@ -582,7 +706,7 @@ __db_hybrid_mutex_suspend(env, mutex, timespec, exclusive)
 			ret = t_ret;
 			goto err;
 		}
-		MUTEX_MEMBAR(mutexp->flags);
+		(void)MUTEX_MEMBAR(mutexp->flags);
 	}
 
 	mutexp->wait--;
@@ -627,8 +751,8 @@ __db_pthread_mutex_unlock(env, mutex)
 	DB_ENV *dbenv;
 	DB_MUTEX *mutexp;
 	int ret;
-#if defined(MUTEX_DIAG) && defined(HAVE_MUTEX_HYBRID)
-	int waiters;
+#ifndef HAVE_MUTEX_HYBRID
+	char description[DB_MUTEX_DESCRIBE_STRLEN];
 #endif
 
 	dbenv = env->dbenv;
@@ -637,14 +761,13 @@ __db_pthread_mutex_unlock(env, mutex)
 		return (0);
 
 	mutexp = MUTEXP_SET(env, mutex);
-#if defined(MUTEX_DIAG) && defined(HAVE_MUTEX_HYBRID)
-	waiters = mutexp->wait;
-#endif
 
-#if !defined(HAVE_MUTEX_HYBRID) && defined(DIAGNOSTIC)
+#if !defined(HAVE_MUTEX_HYBRID)
 	if (!F_ISSET(mutexp, DB_MUTEX_LOCKED | DB_MUTEX_SHARED)) {
-		__db_errx(env, DB_STR("2025",
-		    "pthread unlock failed: lock already unlocked"));
+		if (!PANIC_ISSET(env))
+			__db_errx(env, DB_STR("2069",
+			    "pthread unlock %s: already unlocked"),
+			    __mutex_describe(env, mutex, description));
 		return (__env_panic(env, EACCES));
 	}
 #endif
@@ -662,14 +785,19 @@ __db_pthread_mutex_unlock(env, mutex)
 
 		if (F_ISSET(mutexp, DB_MUTEX_SHARED))
 			RET_SET(
-			    (pthread_cond_broadcast(&mutexp->u.m.cond)), ret);
+			    pthread_cond_broadcast(&mutexp->u.m.cond), ret);
 		else
-			RET_SET((pthread_cond_signal(&mutexp->u.m.cond)), ret);
+			RET_SET(pthread_cond_signal(&mutexp->u.m.cond), ret);
 		if (ret != 0)
 			goto err;
 	} else {
 #ifndef HAVE_MUTEX_HYBRID
-		F_CLR(mutexp, DB_MUTEX_LOCKED);
+
+		if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
+			F_CLR(mutexp, DB_MUTEX_LOCKED);
+		else if (env->thr_hashtab != NULL &&
+		    (ret = __mutex_record_unlock(env, mutex)) != 0)
+		    	goto err;
 #endif
 	}
 
@@ -685,12 +813,6 @@ err:	if (ret != 0) {
 		__db_err(env, ret, "pthread unlock failed");
 		return (__env_panic(env, ret));
 	}
-#if defined(MUTEX_DIAG) && defined(HAVE_MUTEX_HYBRID)
-	if (!MUTEXP_IS_BUSY(mutexp) && mutexp->wait != 0)
-		printf("unlock %ld %x busy %x waiters %d/%d\n",
-		    mutex, pthread_self(), ret,
-		    MUTEXP_BUSY_FIELD(mutexp), waiters, mutexp->wait);
-#endif
 	return (ret);
 }
 
@@ -739,7 +861,7 @@ __db_pthread_mutex_destroy(env, mutex)
 		if (!failchk_thread)
 #endif
 			RET_SET(
-			    (pthread_rwlock_destroy(&mutexp->u.rwlock)), ret);
+			    pthread_rwlock_destroy(&mutexp->u.rwlock), ret);
 		/* For rwlocks, we're done - must not destroy rest of union */
 		return (ret);
 #endif
@@ -754,15 +876,14 @@ __db_pthread_mutex_destroy(env, mutex)
 #ifdef HAVE_PTHREAD_COND_REINIT_OKAY
 		if (!failchk_thread)
 #endif
-			RET_SET((pthread_cond_destroy(&mutexp->u.m.cond)), ret);
+			RET_SET(pthread_cond_destroy(&mutexp->u.m.cond), ret);
 		if (ret != 0)
 			__db_err(env, ret, DB_STR("2026",
 			    "unable to destroy cond"));
 	}
-	RET_SET((pthread_mutex_destroy(&mutexp->u.m.mutex)), t_ret);
+	RET_SET(pthread_mutex_destroy(&mutexp->u.m.mutex), t_ret);
 	if (t_ret != 0 && !failchk_thread) {
-		__db_err(env, t_ret, DB_STR("2027",
-		    "unable to destroy mutex"));
+		__db_err(env, t_ret, DB_STR("2027", "unable to destroy mutex"));
 		if (ret == 0)
 			ret = t_ret;
 	}
diff --git a/src/mutex/mut_region.c b/src/mutex/mut_region.c
index 26ae0a03..976ff231 100644
--- a/src/mutex/mut_region.c
+++ b/src/mutex/mut_region.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -34,7 +34,7 @@ __mutex_open(env, create_ok)
 	DB_MUTEXMGR *mtxmgr;
 	DB_MUTEXREGION *mtxregion;
 	size_t size;
-	u_int32_t cpu_count;
+	u_int32_t cpu_count, tas_spins;
 	int ret;
 #ifndef HAVE_ATOMIC_SUPPORT
 	u_int i;
@@ -55,8 +55,14 @@ __mutex_open(env, create_ok)
 		dbenv->mutex_align = MUTEX_ALIGN;
 	if (dbenv->mutex_tas_spins == 0) {
 		cpu_count = __os_cpu_count();
-		if ((ret = __mutex_set_tas_spins(dbenv, cpu_count == 1 ?
-		    cpu_count : cpu_count * MUTEX_SPINS_PER_PROCESSOR)) != 0)
+		if (cpu_count == 1)
+			tas_spins = 1;
+		else {
+			tas_spins = cpu_count * MUTEX_SPINS_PER_PROCESSOR;
+			if (tas_spins > MUTEX_SPINS_DEFAULT_MAX)
+			    tas_spins = MUTEX_SPINS_DEFAULT_MAX;
+		}
+		if ((ret = __mutex_set_tas_spins(dbenv, tas_spins)) != 0)
 			return (ret);
 	}
 
@@ -118,11 +124,29 @@ __mutex_open(env, create_ok)
 
 	return (0);
 
-err:	env->mutex_handle = NULL;
-	if (mtxmgr->reginfo.addr != NULL)
-		(void)__env_region_detach(env, &mtxmgr->reginfo, 0);
+err:	(void)__mutex_region_detach(env, mtxmgr);
+	return (ret);
+}
 
-	__os_free(env, mtxmgr);
+/*
+ * __mutex_region_detach --
+ *
+ * PUBLIC: int __mutex_region_detach __P((ENV *, DB_MUTEXMGR *));
+ */
+int
+__mutex_region_detach(env, mtxmgr)
+	ENV *env;
+	DB_MUTEXMGR *mtxmgr;
+{
+	int ret;
+
+	ret = 0;
+	if (mtxmgr != NULL) {
+		if (mtxmgr->reginfo.addr != NULL)
+			ret = __env_region_detach(env, &mtxmgr->reginfo, 0);
+		__os_free(env, mtxmgr);
+		env->mutex_handle = NULL;
+	}
 	return (ret);
 }
 
@@ -136,7 +160,6 @@ __mutex_region_init(env, mtxmgr)
 	DB_MUTEXMGR *mtxmgr;
 {
 	DB_ENV *dbenv;
-	DB_MUTEX *mutexp;
 	DB_MUTEXREGION *mtxregion;
 	db_mutex_t mutex;
 	int ret;
@@ -144,8 +167,6 @@ __mutex_region_init(env, mtxmgr)
 
 	dbenv = env->dbenv;
 
-	COMPQUIET(mutexp, NULL);
-
 	if ((ret = __env_alloc(&mtxmgr->reginfo,
 	    sizeof(DB_MUTEXREGION), &mtxmgr->reginfo.primary)) != 0) {
 		__db_errx(env, DB_STR("2013",
@@ -205,26 +226,11 @@ __mutex_region_init(env, mtxmgr)
 	 * in each link.
 	 */
 	env->mutex_handle = mtxmgr;
-	if (F_ISSET(env, ENV_PRIVATE)) {
-		mutexp = (DB_MUTEX *)mutex_array;
-		mutexp++;
-		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
-		mtxregion->mutex_next = (db_mutex_t)mutexp;
-	} else {
-		mtxregion->mutex_next = 1;
-		mutexp = MUTEXP_SET(env, 1);
-	}
-	for (mutex = 1; mutex < mtxregion->stat.st_mutex_cnt; ++mutex) {
-		mutexp->flags = 0;
-		if (F_ISSET(env, ENV_PRIVATE))
-			mutexp->mutex_next_link = (db_mutex_t)(mutexp + 1);
-		else
-			mutexp->mutex_next_link = mutex + 1;
-		mutexp++;
-		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
-	}
-	mutexp->flags = 0;
-	mutexp->mutex_next_link = MUTEX_INVALID;
+	mtxregion->mutex_next = (F_ISSET(env, ENV_PRIVATE) ?
+	    ((uintptr_t)mutex_array + mtxregion->mutex_size) : 1);
+	MUTEX_BULK_INIT(env,
+	    mtxregion, mtxregion->mutex_next, mtxregion->stat.st_mutex_cnt);
+
 	mtxregion->stat.st_mutex_free = mtxregion->stat.st_mutex_cnt;
 	mtxregion->stat.st_mutex_inuse = mtxregion->stat.st_mutex_inuse_max = 0;
 	if ((ret = __mutex_alloc(env, MTX_MUTEX_REGION, 0, &mutex)) != 0)
diff --git a/src/mutex/mut_stat.c b/src/mutex/mut_stat.c
index b64207fa..af622c7d 100644
--- a/src/mutex/mut_stat.c
+++ b/src/mutex/mut_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -19,6 +19,17 @@ static int __mutex_print_stats __P((ENV *, u_int32_t));
 static void __mutex_print_summary __P((ENV *));
 static int __mutex_stat __P((ENV *, DB_MUTEX_STAT **, u_int32_t));
 
+static const FN MutexFlagNames[] = {
+	{ DB_MUTEX_ALLOCATED,		"alloc" },
+	{ DB_MUTEX_LOCKED,		"locked" },
+	{ DB_MUTEX_LOGICAL_LOCK,	"logical" },
+	{ DB_MUTEX_OWNER_DEAD,		"ower-dead" },
+	{ DB_MUTEX_PROCESS_ONLY,	"process-private" },
+	{ DB_MUTEX_SELF_BLOCK,		"self-block" },
+	{ DB_MUTEX_SHARED,		"shared" },
+	{ 0,				NULL }
+};
+
 /*
  * __mutex_stat_pp --
  *	ENV->mutex_stat pre/post processing.
@@ -170,11 +181,12 @@ __mutex_print_summary(env)
 	size = 0;
 
 	if (F_ISSET(env, ENV_PRIVATE)) {
-		mutexp = (DB_MUTEX *)mtxmgr->mutex_array + 1;
+		mutexp = (DB_MUTEX *)((uintptr_t)mtxmgr->mutex_array +
+		    mtxregion->mutex_size);
 		chunk = NULL;
 		size = __env_elem_size(env,
 		    ROFF_TO_P(mtxregion->mutex_off_alloc));
-		size -= sizeof(*mutexp);
+		size -= mtxregion->mutex_size;
 	} else
 		mutexp = MUTEXP_SET(env, 1);
 	for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i) {
@@ -185,13 +197,15 @@ __mutex_print_summary(env)
 		else
 			counts[mutexp->alloc_id]++;
 
-		mutexp++;
+		mutexp = (DB_MUTEX *)((uintptr_t)mutexp +
+		    mtxregion->mutex_size);
 		if (F_ISSET(env, ENV_PRIVATE) &&
 		    (size -= sizeof(*mutexp)) < sizeof(*mutexp)) {
 			mutexp =
 			    __env_get_chunk(&mtxmgr->reginfo, &chunk, &size);
+			mutexp = ALIGNP_INC(mutexp,
+			    mtxregion->stat.st_mutex_align);
 		}
-		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
 	}
 	__db_msg(env, "Mutex counts");
 	__db_msg(env, "%d\tUnallocated", counts[0]);
@@ -252,14 +266,6 @@ __mutex_print_all(env, flags)
 	ENV *env;
 	u_int32_t flags;
 {
-	static const FN fn[] = {
-		{ DB_MUTEX_ALLOCATED,		"alloc" },
-		{ DB_MUTEX_LOCKED,		"locked" },
-		{ DB_MUTEX_LOGICAL_LOCK,	"logical" },
-		{ DB_MUTEX_PROCESS_ONLY,	"process-private" },
-		{ DB_MUTEX_SELF_BLOCK,		"self-block" },
-		{ 0,				NULL }
-	};
 	DB_MSGBUF mb, *mbp;
 	DB_MUTEX *mutexp;
 	DB_MUTEXMGR *mtxmgr;
@@ -294,37 +300,32 @@ __mutex_print_all(env, flags)
 	__db_msg(env, "mutex\twait/nowait, pct wait, holder, flags");
 	size = 0;
 	if (F_ISSET(env, ENV_PRIVATE)) {
-		mutexp = (DB_MUTEX *)mtxmgr->mutex_array + 1;
+		mutexp = (DB_MUTEX *)((uintptr_t)mtxmgr->mutex_array +
+		    mtxregion->mutex_size);
 		chunk = NULL;
 		size = __env_elem_size(env,
 		    ROFF_TO_P(mtxregion->mutex_off_alloc));
-		size -= sizeof(*mutexp);
+		size -= mtxregion->mutex_size;
 	} else
 		mutexp = MUTEXP_SET(env, 1);
 	for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i) {
 		if (F_ISSET(mutexp, DB_MUTEX_ALLOCATED)) {
 			__db_msgadd(env, mbp, "%5lu\t", (u_long)i);
-
 			__mutex_print_debug_stats(env, mbp,
 			    F_ISSET(env, ENV_PRIVATE) ?
 			    (db_mutex_t)mutexp : i, flags);
-
-			if (mutexp->alloc_id != 0)
-				__db_msgadd(env, mbp,
-				    ", %s", __mutex_print_id(mutexp->alloc_id));
-
-			__db_prflags(env, mbp, mutexp->flags, fn, " (", ")");
-
 			DB_MSGBUF_FLUSH(env, mbp);
 		}
 
-		mutexp++;
+		mutexp = (DB_MUTEX *)((uintptr_t)mutexp +
+		    mtxregion->mutex_size);
 		if (F_ISSET(env, ENV_PRIVATE) &&
-		    (size -= sizeof(*mutexp)) < sizeof(*mutexp)) {
+		    (size -= mtxregion->mutex_size) < mtxregion->mutex_size) {
 			mutexp =
 			    __env_get_chunk(&mtxmgr->reginfo, &chunk, &size);
+			mutexp = ALIGNP_INC(mutexp,
+			    mtxregion->stat.st_mutex_align);
 		}
-		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
 	}
 
 	return (0);
@@ -332,8 +333,7 @@ __mutex_print_all(env, flags)
 
 /*
  * __mutex_print_debug_single --
- *	Print mutex internal debugging statistics for a single mutex on a
- *	single output line.
+ *	Print mutex internal debugging statistics for a single mutex.
  *
  * PUBLIC: void __mutex_print_debug_single
  * PUBLIC:          __P((ENV *, const char *, db_mutex_t, u_int32_t));
@@ -359,8 +359,9 @@ __mutex_print_debug_single(env, tag, mutex, flags)
 
 /*
  * __mutex_print_debug_stats --
- *	Print mutex internal debugging statistics, that is, the statistics
- *	in the [] square brackets.
+ *	Print the mutex internal debugging statistics in square bracket,s on a
+ *	followed by the allocation id and flags, on single line. When MUTEX_DIAG
+ *	is on and the mutex is held, append the owner's stack trace.
  *
  * PUBLIC: void __mutex_print_debug_stats
  * PUBLIC:          __P((ENV *, DB_MSGBUF *, db_mutex_t, u_int32_t));
@@ -380,6 +381,9 @@ __mutex_print_debug_stats(env, mbp, mutex, flags)
     !defined(HAVE_MUTEX_PTHREADS))
 	int sharecount;
 #endif
+#ifdef MUTEX_DIAG
+	char timestr[CTIME_BUFLEN];
+#endif
 
 	if (mutex == MUTEX_INVALID) {
 		__db_msgadd(env, mbp, "[!Set]");
@@ -448,6 +452,22 @@ __mutex_print_debug_stats(env, mbp, mutex, flags)
 		    mutexp->hybrid_wait, mutexp->hybrid_wakeup);
 #endif
 
+	if (mutexp->alloc_id != 0)
+		__db_msgadd(env,
+		    mbp, ", %s", __mutex_print_id(mutexp->alloc_id));
+
+	__db_prflags(env, mbp, mutexp->flags, MutexFlagNames, " (", ")");
+#ifdef MUTEX_DIAG
+	if (mutexp->alloc_id != MTX_LOGICAL_LOCK &&
+	    timespecisset(&mutexp->mutex_history.when)) {
+		__db_ctimespec(&mutexp->mutex_history.when, timestr);
+		__db_msgadd(env, mbp, "\nLocked %s", timestr);
+		if (mutexp->mutex_history.stacktext[0] != '\0')
+			__db_msgadd(env, mbp, "\n%.*s",
+			    (int)sizeof(mutexp->mutex_history.stacktext) - 1,
+			    mutexp->mutex_history.stacktext);
+	}
+#endif
 	if (LF_ISSET(DB_STAT_CLEAR))
 		__mutex_clear(env, mutex);
 }
@@ -495,7 +515,8 @@ __mutex_print_id(alloc_id)
 	case MTX_TXN_COMMIT:		return ("txn commit");
 	case MTX_TXN_MVCC:		return ("txn mvcc");
 	case MTX_TXN_REGION:		return ("txn region");
-	default:			return ("unknown mutex type");
+	case 0:				return ("invalid 0 mutex type");
+	default:			return ("unknown non-zero mutex type");
 	/* NOTREACHED */
 	}
 }
@@ -577,3 +598,39 @@ __mutex_stat_print_pp(dbenv, flags)
 	return (__db_stat_not_built(dbenv->env));
 }
 #endif
+
+/*
+ * __mutex_describe
+ *	Fill in a buffer with the mutex #, alloc_id, and any other
+ *	characteristics which are likely to be useful for diagnostics. The
+ *	destination buffer must hold at least DB_MUTEX_DESCRIBE_STRLEN bytes.
+ *
+ * PUBLIC: char *__mutex_describe __P((ENV *, db_mutex_t, char *));
+ */
+char *
+__mutex_describe(env, mutex, dest)
+	ENV *env;
+	db_mutex_t mutex;
+	char *dest;
+{
+	DB_MUTEX *mutexp;
+	DB_MSGBUF mb, *mbp;
+	const char *type;
+
+	DB_MSGBUF_INIT(&mb);
+	mbp = &mb;
+	mutexp = MUTEXP_SET(env, mutex);
+	type = F_ISSET(mutexp, DB_MUTEX_SHARED) ? "latch" : "mutex";
+#ifdef HAVE_STATISTICS
+	__db_msgadd(env, mbp, "%s %s id %ld ",
+	    __mutex_print_id(mutexp->alloc_id), type, (long)mutex);
+	__db_prflags(env, mbp, mutexp->flags, MutexFlagNames, " (", ")");
+#else
+	__db_msgadd(env, mbp, "%s flags %x id %ld ",
+	    type, mutexp->flags, (long)mutex);
+#endif
+	(void)snprintf(dest, DB_MUTEX_DESCRIBE_STRLEN - 1,
+	    "%.*s", (int)(mbp->cur - mbp->buf), mbp->buf);
+	dest[DB_MUTEX_DESCRIBE_STRLEN - 1] = '\0';
+	return (dest);
+}
diff --git a/src/mutex/mut_stub.c b/src/mutex/mut_stub.c
index 61ecc80c..0ece9a9d 100644
--- a/src/mutex/mut_stub.c
+++ b/src/mutex/mut_stub.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -157,6 +157,16 @@ __mutex_print_debug_stats(env, mbp, mutex, flags)
 }
 
 int
+__mutex_refresh(env, mutex)
+	ENV *env;
+	db_mutex_t mutex;
+{
+	COMPQUIET(env, NULL);
+	COMPQUIET(mutex, MUTEX_INVALID);
+	return (0);
+}
+
+int
 __mutex_set_align(dbenv, align)
 	DB_ENV *dbenv;
 	u_int32_t align;
diff --git a/src/mutex/mut_tas.c b/src/mutex/mut_tas.c
index 0899d237..c7cc3ea5 100644
--- a/src/mutex/mut_tas.c
+++ b/src/mutex/mut_tas.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -52,8 +52,7 @@ __db_tas_mutex_init(env, mutex, flags)
 #endif
 	if (MUTEX_INIT(&mutexp->tas)) {
 		ret = __os_get_syserr();
-		__db_syserr(env, ret, DB_STR("2029",
-		    "TAS: mutex initialize"));
+		__db_syserr(env, ret, DB_STR("2029", "TAS: mutex initialize"));
 		return (__os_posix_err(ret));
 	}
 #ifdef HAVE_MUTEX_HYBRID
@@ -66,7 +65,9 @@ __db_tas_mutex_init(env, mutex, flags)
 
 /*
  * __db_tas_mutex_lock_int
- *     Internal function to lock a mutex, or just try to lock it without waiting
+ *	Internal function to lock a mutex, or just try to lock it without
+ *	waiting. MUTEX_WAIT() passes in a timeout to allow an early exit
+ *	returning DB_TIMEOUT.
  */
 inline static int
 __db_tas_mutex_lock_int(env, mutex, timeout, nowait)
@@ -80,13 +81,15 @@ __db_tas_mutex_lock_int(env, mutex, timeout, nowait)
 	DB_MUTEXMGR *mtxmgr;
 	DB_MUTEXREGION *mtxregion;
 	DB_THREAD_INFO *ip;
-	db_timespec now, timespec;
+	db_timespec now, timeout_timespec;
 	u_int32_t nspins;
+	u_long micros;
 	int ret;
-#ifdef HAVE_MUTEX_HYBRID
-	const u_long micros = 0;
-#else
-	u_long micros, max_micros;
+#ifdef DIAGNOSTIC
+	char buf[DB_THREADID_STRLEN];
+#endif
+#ifndef HAVE_MUTEX_HYBRID
+	u_long max_micros;
 	db_timeout_t time_left;
 #endif
 
@@ -95,21 +98,23 @@ __db_tas_mutex_lock_int(env, mutex, timeout, nowait)
 	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
 		return (0);
 
+	PANIC_CHECK(env);
+
 	mtxmgr = env->mutex_handle;
 	mtxregion = mtxmgr->reginfo.primary;
 	mutexp = MUTEXP_SET(env, mutex);
 
 	CHECK_MTX_THREAD(env, mutexp);
 
-#ifdef HAVE_STATISTICS
 	if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
 		STAT_INC(env, mutex, set_wait, mutexp->mutex_set_wait, mutex);
 	else
 		STAT_INC(env,
 		    mutex, set_nowait, mutexp->mutex_set_nowait, mutex);
-#endif
 
-#ifndef HAVE_MUTEX_HYBRID
+#ifdef HAVE_MUTEX_HYBRID
+	micros = 0;
+#else
 	/*
 	 * Wait 1ms initially, up to 10ms for mutexes backing logical database
 	 * locks, and up to 25 ms for mutual exclusion data structure mutexes.
@@ -119,16 +124,15 @@ __db_tas_mutex_lock_int(env, mutex, timeout, nowait)
 	max_micros = F_ISSET(mutexp, DB_MUTEX_LOGICAL_LOCK) ? 10000 : 25000;
 #endif
 
-	/* Clear the ending timespec so it'll be initialed upon first need. */
+	/* Clear the ending timespec so it'll be initialized upon first need. */
 	if (timeout != 0)
-		timespecclear(&timespec);
+		timespecclear(&timeout_timespec);
 
 	 /*
-	 * Only check the thread state once, by initializing the thread
-	 * control block pointer to null.  If it is not the failchk
-	 * thread, then ip will have a valid value subsequent times
-	 * in the loop.
-	 */
+	  * Only check the thread state once, by initializing the thread
+	  * control block pointer to null.  If it is not the failchk thread,
+	  * then ip will be valid during the subsequent times in the loop.
+	  */
 	ip = NULL;
 
 loop:	/* Attempt to acquire the resource for N spins. */
@@ -151,16 +155,45 @@ loop:	/* Attempt to acquire the resource for N spins. */
 			if (F_ISSET(dbenv, DB_ENV_FAILCHK) &&
 			    ip == NULL && dbenv->is_alive(dbenv,
 			    mutexp->pid, mutexp->tid, 0) == 0) {
+				/*
+				 * The process owing the mutex is "dead" now, but it may
+				 * have already released the mutex. We need to check again
+				 * by going back to the top of the loop if the mutex is 
+				 * still held by the "dead" process. We yield 10 us to
+				 * increase the likelyhood of mutexp fields being up-to-date.
+				 * Set spin so we spin one more time because there isno need
+				 * to spin more if the dead process owns the mutex.
+				 */                               
+				if (nspins > 1) {
+					nspins = 2;
+					__os_yield(env, 0, 10);
+					continue;
+				}
 				ret = __env_set_state(env, &ip, THREAD_VERIFY);
 				if (ret != 0 ||
-				    ip->dbth_state == THREAD_FAILCHK)
-					return (DB_RUNRECOVERY);
+				    ip->dbth_state == THREAD_FAILCHK) {
+					/*
+					 * Either we could not get the thread
+					 * state or we did and found that this
+					 * is the failchk thread. Return a panic
+					 * code in either case, but if the
+					 * failchk thread don't give more
+					 * notice of the already-existing panic.
+					 */
+				    	if (ret == 0)
+						return (USR_ERR(env,
+						    DB_RUNRECOVERY));
+					else
+						return (__env_panic(env,
+							USR_ERR(env, ret)));
+				}
 			}
 			if (nowait)
-				return (DB_LOCK_NOTGRANTED);
+				return (USR_ERR(env, DB_LOCK_NOTGRANTED));
 			/*
 			 * Some systems (notably those with newer Intel CPUs)
 			 * need a small pause here. [#6975]
+			 * XXX Is there some better post-Pentum 4?
 			 */
 			MUTEX_PAUSE
 			continue;
@@ -189,9 +222,14 @@ loop:	/* Attempt to acquire the resource for N spins. */
 		 * the DB mutex unlock function.
 		 */
 #endif
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD)) {
+			MUTEX_UNSET(&mutexp->tas);
+			return (__mutex_died(env, mutex));
+		}
+#endif
 #ifdef DIAGNOSTIC
 		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
-			char buf[DB_THREADID_STRLEN];
 			__db_errx(env, DB_STR_A("2030",
 		    "TAS lock failed: lock %ld currently in use: ID: %s",
 			    "%ld %s"), (long)mutex,
@@ -202,6 +240,12 @@ loop:	/* Attempt to acquire the resource for N spins. */
 #endif
 		F_SET(mutexp, DB_MUTEX_LOCKED);
 		dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
+#if defined(MUTEX_DIAG)
+		__os_gettime(env, &mutexp->mutex_history.when, 0);
+		/* Why 3? Skip __os_stack_text, __db_tas_mutex_lock{_int,} */
+		__os_stack_text(env, mutexp->mutex_history.stacktext,
+		    sizeof(mutexp->mutex_history.stacktext), 12, 3);
+#endif
 
 #ifdef DIAGNOSTIC
 		/*
@@ -215,20 +259,20 @@ loop:	/* Attempt to acquire the resource for N spins. */
 	}
 
 	/*
-	 * We need to wait for the lock to become available.
-	 * Possibly setup timeouts if this is the first wait, or
-	 * check expiration times for the second and subsequent waits.
+	 * We need to wait for the lock to become available.  Setup timeouts if
+	 * this is the first wait, or the failchk timeout is smaller than the
+	 * wait timeout. Check expiration times for subsequent waits.
 	 */
 	if (timeout != 0) {
 		/* Set the expiration time if this is the first sleep . */
-		if (!timespecisset(&timespec))
-			__clock_set_expires(env, &timespec, timeout);
+		if (!timespecisset(&timeout_timespec))
+			__clock_set_expires(env, &timeout_timespec, timeout);
 		else {
 			timespecclear(&now);
-			if (__clock_expired(env, &now, &timespec))
-				return (DB_TIMEOUT);
+			if (__clock_expired(env, &now, &timeout_timespec))
+				return (USR_ERR(env, DB_TIMEOUT));
 #ifndef HAVE_MUTEX_HYBRID
-			timespecsub(&now, &timespec);
+			timespecsub(&now, &timeout_timespec);
 			DB_TIMESPEC_TO_TIMEOUT(time_left, &now, 0);
 			time_left = timeout - time_left;
 			if (micros > time_left)
@@ -253,13 +297,21 @@ loop:	/* Attempt to acquire the resource for N spins. */
 		goto loop;
 	/* Wait until the mutex can be obtained exclusively or it times out. */
 	if ((ret = __db_hybrid_mutex_suspend(env,
-	    mutex, timeout == 0 ? NULL : &timespec, TRUE)) != 0)
+	    mutex, timeout == 0 ? NULL : &timeout_timespec, TRUE)) != 0) {
+		DB_DEBUG_MSG(env,
+		    "mutex_lock %ld suspend returned %d", (u_long)mutex, ret);
 		return (ret);
+	}
 #else
 	if ((micros <<= 1) > max_micros)
 		micros = max_micros;
 #endif
 
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+	    dbenv->mutex_failchk_timeout != 0)
+		return (__mutex_died(env, mutex));
+#endif
 	/*
 	 * We're spinning.  The environment might be hung, and somebody else
 	 * has already recovered it.  The first thing recovery does is panic
@@ -291,7 +343,7 @@ __db_tas_mutex_lock(env, mutex, timeout)
  *	Try to exclusively lock a mutex without ever blocking - ever!
  *
  *	Returns 0 on success,
- *		DB_LOCK_NOTGRANTED on timeout
+ *		DB_LOCK_NOTGRANTED if it is busy.
  *		Possibly DB_RUNRECOVERY if DB_ENV_FAILCHK or panic.
  *
  *	This will work for DB_MUTEX_SHARED, though it always tries
@@ -324,9 +376,9 @@ __db_tas_mutex_readlock_int(env, mutex, nowait)
 	DB_MUTEXMGR *mtxmgr;
 	DB_MUTEXREGION *mtxregion;
 	DB_THREAD_INFO *ip;
-	int lock;
+	MUTEX_STATE *state;
+	int lock, ret;
 	u_int32_t nspins;
-	int ret;
 #ifndef HAVE_MUTEX_HYBRID
 	u_long micros, max_micros;
 #endif
@@ -342,14 +394,17 @@ __db_tas_mutex_readlock_int(env, mutex, nowait)
 	CHECK_MTX_THREAD(env, mutexp);
 
 	DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_SHARED));
-#ifdef HAVE_STATISTICS
 	if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
 		STAT_INC(env,
 		    mutex, set_rd_wait, mutexp->mutex_set_rd_wait, mutex);
 	else
 		STAT_INC(env,
 		    mutex, set_rd_nowait, mutexp->mutex_set_rd_nowait, mutex);
-#endif
+
+	state = NULL;
+	if (env->thr_hashtab != NULL && (ret = __mutex_record_lock(env,
+	    mutex, MUTEX_ACTION_INTEND_SHARE, &state)) != 0)
+		return (ret);
 
 #ifndef HAVE_MUTEX_HYBRID
 	/*
@@ -375,25 +430,52 @@ loop:	/* Attempt to acquire the resource for N spins. */
 			MUTEX_PAUSE
 			continue;
 		}
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+		    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+			(void)atomic_compare_exchange(env,
+			    &mutexp->sharecount, lock, lock - 1);
+			if (state != NULL)
+				state->action = MUTEX_ACTION_UNLOCKED;
+		       return (__mutex_died(env, mutex));
+	       }
+#endif
 
 		MEMBAR_ENTER();
+#ifdef MUTEX_DIAG
+		__os_gettime(env, &mutexp->mutex_history.when, 0);
+		__os_stack_text(env, mutexp->mutex_history.stacktext,
+		    sizeof(mutexp->mutex_history.stacktext), 12, 3);
+#endif
 		/* For shared latches the threadid is the last requestor's id.
 		 */
 		dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
+		if (state != NULL)
+			state->action = MUTEX_ACTION_SHARED;
 
 		return (0);
 	}
 
-	/*
-	 * Waiting for the latched must be avoided when it could allow a
-	 * 'failchk'ing thread to hang.
-	 */
+	/* Waiting for the latch must be avoided if it could hang up failchk. */
 	if (F_ISSET(dbenv, DB_ENV_FAILCHK) &&
 	    dbenv->is_alive(dbenv, mutexp->pid, mutexp->tid, 0) == 0) {
 		ret = __env_set_state(env, &ip, THREAD_VERIFY);
-		if (ret != 0 || ip->dbth_state == THREAD_FAILCHK)
-			return (DB_RUNRECOVERY);
+		if (ret != 0 || ip->dbth_state == THREAD_FAILCHK) {
+			if (state != NULL)
+				state->action = MUTEX_ACTION_UNLOCKED;
+			if (ret == 0)
+				return (USR_ERR(env, DB_RUNRECOVERY));
+			else
+				return (__env_panic(env, USR_ERR(env, ret)));
+		}
 	}
+#ifdef HAVE_FAILCHK_BROADCAST
+       if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD)) {
+	       if (state != NULL)
+		       state->action = MUTEX_ACTION_UNLOCKED;
+	       return (__mutex_died(env, mutex));
+       }
+#endif
 
 	/*
 	 * It is possible to spin out when the latch is just shared, due to
@@ -403,6 +485,8 @@ loop:	/* Attempt to acquire the resource for N spins. */
 	if (nowait) {
 		if (atomic_read(&mutexp->sharecount) != MUTEX_SHARE_ISEXCLUSIVE)
 			goto loop;
+		if (state != NULL)
+			state->action = MUTEX_ACTION_UNLOCKED;
 		return (DB_LOCK_NOTGRANTED);
 	}
 
@@ -419,8 +503,11 @@ loop:	/* Attempt to acquire the resource for N spins. */
 	if (atomic_read(&mutexp->sharecount) != MUTEX_SHARE_ISEXCLUSIVE)
 		goto loop;
 	/* Wait until the mutex is no longer exclusively locked. */
-	if ((ret = __db_hybrid_mutex_suspend(env, mutex, NULL, FALSE)) != 0)
+	if ((ret = __db_hybrid_mutex_suspend(env, mutex, NULL, FALSE)) != 0) {
+		if (state != NULL)
+			state->action = MUTEX_ACTION_UNLOCKED;
 		return (ret);
+	}
 #else
 	PERFMON4(env, mutex, suspend, mutex, FALSE, mutexp->alloc_id, mutexp);
 	__os_yield(env, 0, micros);
@@ -486,17 +573,13 @@ __db_tas_mutex_tryreadlock(env, mutex)
  */
 int
 __db_tas_mutex_unlock(env, mutex)
-    ENV *env;
+	ENV *env;
 	db_mutex_t mutex;
 {
 	DB_ENV *dbenv;
 	DB_MUTEX *mutexp;
-#ifdef HAVE_MUTEX_HYBRID
 	int ret;
-#ifdef MUTEX_DIAG
-	int waiters;
-#endif
-#endif
+	char description[DB_MUTEX_DESCRIBE_STRLEN];
 #ifdef HAVE_SHARED_LATCHES
 	int sharecount;
 #endif
@@ -506,14 +589,14 @@ __db_tas_mutex_unlock(env, mutex)
 		return (0);
 
 	mutexp = MUTEXP_SET(env, mutex);
-#if defined(HAVE_MUTEX_HYBRID) && defined(MUTEX_DIAG)
-	waiters = mutexp->wait;
-#endif
 
 #if defined(DIAGNOSTIC)
 #if defined(HAVE_SHARED_LATCHES)
 	if (F_ISSET(mutexp, DB_MUTEX_SHARED)) {
 		if (atomic_read(&mutexp->sharecount) == 0) {
+			if (PANIC_ISSET(env))
+				return (__env_panic(env, 
+				    USR_ERR(env, DB_RUNRECOVERY)));
 			__db_errx(env, DB_STR_A("2031",
 			    "shared unlock %ld already unlocked", "%ld"),
 			    (long)mutex);
@@ -522,16 +605,39 @@ __db_tas_mutex_unlock(env, mutex)
 	} else
 #endif
 	if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
+		if (PANIC_ISSET(env))
+			return (__env_panic(env, 
+			    USR_ERR(env, DB_RUNRECOVERY)));
 		__db_errx(env, DB_STR_A("2032",
 		    "unlock %ld already unlocked", "%ld"), (long)mutex);
 		return (__env_panic(env, EACCES));
 	}
 #endif
+#ifdef MUTEX_DIAG
+	timespecclear(&mutexp->mutex_history.when);
+#endif
 
 #ifdef HAVE_SHARED_LATCHES
 	if (F_ISSET(mutexp, DB_MUTEX_SHARED)) {
 		sharecount = atomic_read(&mutexp->sharecount);
-		/*MUTEX_MEMBAR(mutexp->sharecount);*/		/* XXX why? */
+		/*
+		 * Many code paths contain sequence of the form
+		 *	MUTEX_LOCK(); ret = function(); MUTEX_UNLOCK();
+		 * If function() sees or causes a panic while it had temporarily
+		 * unlocked the mutex it won't be locked anymore. Don't confuse
+		 * the error by generating spurious follow-on messages.
+		 */
+		if (sharecount == 0) {
+was_not_locked:
+			if (!PANIC_ISSET(env)) {
+				__db_errx(env, DB_STR_A("2070",
+				    "Shared unlock %s: already unlocked", "%s"),
+				    __mutex_describe(env, mutex, description));
+				return (__env_panic(env, 
+				    USR_ERR(env, DB_RUNRECOVERY)));
+			}
+			return (__env_panic(env, EACCES));
+		    }
 		if (sharecount == MUTEX_SHARE_ISEXCLUSIVE) {
 			F_CLR(mutexp, DB_MUTEX_LOCKED);
 			/* Flush flag update before zeroing count */
@@ -542,12 +648,17 @@ __db_tas_mutex_unlock(env, mutex)
 			MEMBAR_EXIT();
 			sharecount = atomic_dec(env, &mutexp->sharecount);
 			DB_ASSERT(env, sharecount >= 0);
+			if (env->thr_hashtab != NULL &&
+			    (ret = __mutex_record_unlock(env, mutex)) != 0)
+				return (ret);
 			if (sharecount > 0)
 				return (0);
 		}
 	} else
 #endif
 	{
+		if (!F_ISSET(mutexp, DB_MUTEX_LOCKED))
+			goto was_not_locked;
 		F_CLR(mutexp, DB_MUTEX_LOCKED);
 		MUTEX_UNSET(&mutexp->tas);
 	}
@@ -559,17 +670,10 @@ __db_tas_mutex_unlock(env, mutex)
 #endif
 
 	/* Prevent the load of wait from being hoisted before MUTEX_UNSET */
-	MUTEX_MEMBAR(mutexp->flags);
+	(void)MUTEX_MEMBAR(mutexp->flags);
 	if (mutexp->wait &&
 	    (ret = __db_pthread_mutex_unlock(env, mutex)) != 0)
 		    return (ret);
-
-#ifdef MUTEX_DIAG
-	if (mutexp->wait)
-		printf("tas_unlock %ld %x waiters! busy %x waiters %d/%d\n",
-		    mutex, pthread_self(),
-		    MUTEXP_BUSY_FIELD(mutexp), waiters, mutexp->wait);
-#endif
 #endif
 
 	return (0);
diff --git a/src/mutex/mut_win32.c b/src/mutex/mut_win32.c
index 07d5a8dd..270e03fb 100644
--- a/src/mutex/mut_win32.c
+++ b/src/mutex/mut_win32.c
@@ -1,7 +1,7 @@
 /*
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 2002, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2002, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * $Id$
  */
@@ -92,6 +92,9 @@ __db_win32_mutex_lock_int(env, mutex, timeout, wait)
 	db_timespec now, tempspec, timeoutspec;
 	db_timeout_t time_left;
 	int ret;
+#ifdef DIAGNOSTIC
+	char buf[DB_THREADID_STRLEN];
+#endif
 #ifdef MUTEX_DIAG
 	LARGE_INTEGER now;
 #endif
@@ -143,8 +146,10 @@ loop:	/* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */
 			    mutexp->pid, mutexp->tid, 0) == 0) {
 				ret = __env_set_state(env, &ip, THREAD_VERIFY);
 				if (ret != 0 ||
-				    ip->dbth_state == THREAD_FAILCHK)
-					return (DB_RUNRECOVERY);
+				    ip->dbth_state == THREAD_FAILCHK) {
+					ret = DB_RUNRECOVERY;
+					goto failed;
+				}
 			}
 			if (!wait)
 				return (DB_LOCK_NOTGRANTED);
@@ -155,15 +160,20 @@ loop:	/* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */
 			MUTEX_PAUSE
 			continue;
 		}
-
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD)) {
+			MUTEX_UNSET(&mutexp->tas);
+			goto died;
+		}
+#endif
 #ifdef DIAGNOSTIC
 		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
-			char buf[DB_THREADID_STRLEN];
 			__db_errx(env, DB_STR_A("2003",
 			    "Win32 lock failed: mutex already locked by %s",
 			    "%s"), dbenv->thread_id_string(dbenv,
 			    mutexp->pid, mutexp->tid, buf));
-			return (__env_panic(env, EACCES));
+			ret = __env_panic(env, EACCES);
+			goto failed;
 		}
 #endif
 		F_SET(mutexp, DB_MUTEX_LOCKED);
@@ -179,11 +189,12 @@ loop:	/* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */
 			CloseHandle(event);
 			InterlockedDecrement(&mutexp->nwaiters);
 #ifdef MUTEX_DIAG
+			/* "ret" was set by WaitForSingleObject(). */
 			if (ret != WAIT_OBJECT_0) {
 				QueryPerformanceCounter(&diag_now);
 				printf(DB_STR_A("2004",
-				    "[%I64d]: Lost signal on mutex %p, "
-				    "id %d, ms %d\n", "%I64d %p %d %d"),
+				    "[%lld]: Lost signal on mutex %p, "
+				    "id %d, ms %d\n", "%lld %p %d %d"),
 				    diag_now.QuadPart, mutexp, mutexp->id, ms);
 			}
 #endif
@@ -210,11 +221,8 @@ loop:	/* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */
 	if (timeout != 0) {
 		timespecclear(&now);
 		if (__clock_expired(env, &now, &timeoutspec)) {
-			if (event != NULL) {
-				CloseHandle(event);
-				InterlockedDecrement(&mutexp->nwaiters);
-			}
-			return (DB_TIMEOUT);
+			ret = DB_TIMEOUT;
+			goto failed;
 		}
 		/* Reduce the event wait if the timeout would happen first. */
 		tempspec = timeoutspec;
@@ -228,24 +236,41 @@ loop:	/* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */
 #ifdef MUTEX_DIAG
 		QueryPerformanceCounter(&diag_now);
 		printf(DB_STR_A("2005",
-		    "[%I64d]: Waiting on mutex %p, id %d\n",
-		    "%I64d %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
+		    "[%lld]: Waiting on mutex %p, id %d\n",
+		    "%lld %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
 #endif
 		InterlockedIncrement(&mutexp->nwaiters);
-		if ((ret = get_handle(env, mutexp, &event)) != 0)
-			goto err;
+		if ((ret = get_handle(env, mutexp, &event)) != 0) {
+			InterlockedDecrement(&mutexp->nwaiters);
+			goto syserr;
+		}
 	}
 	if ((ret = WaitForSingleObject(event, ms)) == WAIT_FAILED) {
 		ret = __os_get_syserr();
-		goto err;
+		goto syserr;
 	}
 	if ((ms <<= 1) > MS_PER_SEC)
 		ms = MS_PER_SEC;
 
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+	    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+died:
+		ret = __mutex_died(env, mutex);
+		goto failed;
+	}
+#endif
 	PANIC_CHECK(env);
 	goto loop;
 
-err:	__db_syserr(env, ret, DB_STR("2006", "Win32 lock failed"));
+failed:
+	if (event != NULL) {
+		CloseHandle(event);
+		InterlockedDecrement(&mutexp->nwaiters);
+	}
+	return (ret);
+
+syserr:	__db_syserr(env, ret, DB_STR("2006", "Win32 lock failed"));
 	return (__env_panic(env, __os_posix_err(ret)));
 }
 
@@ -266,6 +291,12 @@ __db_win32_mutex_init(env, mutex, flags)
 	mutexp = MUTEXP_SET(env, mutex);
 	mutexp->id = ((getpid() & 0xffff) << 16) ^ P_TO_UINT32(mutexp);
 	F_SET(mutexp, flags);
+	/*
+	 * See WINCE_ATOMIC_MAGIC definition for details.
+	 * Use sharecount, because the value just needs to be a db_atomic_t
+	 * memory mapped onto the same page as those being Interlocked*.
+	 */
+	WINCE_ATOMIC_MAGIC(&mutexp->sharecount);
 
 	return (0);
 }
@@ -315,9 +346,11 @@ __db_win32_mutex_readlock_int(env, mutex, nowait)
 	DB_MUTEXMGR *mtxmgr;
 	DB_MUTEXREGION *mtxregion;
 	HANDLE event;
+	MUTEX_STATE *state;
 	u_int32_t nspins;
-	int ms, ret;
-	long exch_ret, mtx_val;
+	int max_ms, ms, ret;
+	long mtx_val;
+
 #ifdef MUTEX_DIAG
 	LARGE_INTEGER diag_now;
 #endif
@@ -342,11 +375,23 @@ __db_win32_mutex_readlock_int(env, mutex, nowait)
 	event = NULL;
 	ms = 50;
 	ret = 0;
+
+	state = NULL;
+	if (env->thr_hashtab != NULL && (ret = __mutex_record_lock(env,
+	    mutex, MUTEX_ACTION_INTEND_SHARE, &state)) != 0)
+		return (ret);
+#ifdef HAVE_FAILCHK_BROADCAST
 	/*
-	 * This needs to be initialized, since if mutexp->tas
-	 * is write locked on the first pass, it needs a value.
+	 * Limit WaitForSingleObject() sleeps to at most the failchk timeout,
+	 * and least 1 millisecond. When failchk broadcasting is not
+	 * supported check at least every second.
 	 */
-	exch_ret = 0;
+	if (dbenv->mutex_failchk_timeout != 0 &&
+	    (max_ms = (dbenv->mutex_failchk_timeout / US_PER_MS)) == 0)
+		max_ms = 1;
+	else
+#endif
+		max_ms = MS_PER_SEC;
 
 loop:	/* Attempt to acquire the resource for N spins. */
 	for (nspins =
@@ -357,9 +402,10 @@ loop:	/* Attempt to acquire the resource for N spins. */
 		 */
 retry:		mtx_val = atomic_read(&mutexp->sharecount);
 		if (mtx_val == MUTEX_SHARE_ISEXCLUSIVE) {
-			if (nowait)
-				return (DB_LOCK_NOTGRANTED);
-
+			if (nowait) {
+				ret = DB_LOCK_NOTGRANTED;
+				goto failed;
+			}
 			continue;
 		} else if (!atomic_compare_exchange(env, &mutexp->sharecount,
 		    mtx_val, mtx_val + 1)) {
@@ -370,6 +416,15 @@ retry:		mtx_val = atomic_read(&mutexp->sharecount);
 			MUTEX_PAUSE
 			goto retry;
 		}
+#ifdef HAVE_FAILCHK_BROADCAST
+		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+		    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+			InterlockedDecrement(
+			    (interlocked_val)&mutexp->sharecount);
+			ret = __mutex_died(env, mutex);
+			goto failed;
+		}
+#endif
 
 #ifdef HAVE_STATISTICS
 		if (event == NULL)
@@ -384,12 +439,14 @@ retry:		mtx_val = atomic_read(&mutexp->sharecount);
 			if (ret != WAIT_OBJECT_0) {
 				QueryPerformanceCounter(&diag_now);
 				printf(DB_STR_A("2007",
-				    "[%I64d]: Lost signal on mutex %p, "
-				    "id %d, ms %d\n", "%I64d %p %d %d"),
+				    "[%lld]: Lost signal on mutex %p, "
+				    "id %d, ms %d\n", "%lld %p %d %d"),
 				    diag_now.QuadPart, mutexp, mutexp->id, ms);
 			}
 #endif
 		}
+		if (state != NULL)
+			state->action = MUTEX_ACTION_SHARED;
 
 #ifdef DIAGNOSTIC
 		/*
@@ -404,17 +461,17 @@ retry:		mtx_val = atomic_read(&mutexp->sharecount);
 	}
 
 	/*
-	 * Yield the processor; wait 50 ms initially, up to 1 second.  This
-	 * loop is needed to work around a race where the signal from the
-	 * unlocking thread gets lost.  We start at 50 ms because it's unlikely
-	 * to happen often and we want to avoid wasting CPU.
+	 * Yield the processor; wait 50 ms initially, up to 1 second or the
+	 * failchk timeout. This loop works around a race where the signal from
+	 * the unlocking thread gets lost.  We start at 50 ms because it's
+	 * unlikely to happen often and we want to avoid wasting CPU.
 	 */
 	if (event == NULL) {
 #ifdef MUTEX_DIAG
 		QueryPerformanceCounter(&diag_now);
 		printf(DB_STR_A("2008",
-		    "[%I64d]: Waiting on mutex %p, id %d\n",
-		    "%I64d %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
+		    "[%lld]: Waiting on mutex %p, id %d\n",
+		    "%lld %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
 #endif
 		InterlockedIncrement(&mutexp->nwaiters);
 		if ((ret = get_handle(env, mutexp, &event)) != 0)
@@ -424,12 +481,32 @@ retry:		mtx_val = atomic_read(&mutexp->sharecount);
 		ret = __os_get_syserr();
 		goto err;
 	}
-	if ((ms <<= 1) > MS_PER_SEC)
-		ms = MS_PER_SEC;
+
+#ifdef HAVE_FAILCHK_BROADCAST
+	if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
+	    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
+		(void)atomic_compare_exchange(env,
+		    &mutexp->sharecount, mtx_val, mtx_val - 1);
+		ret = __mutex_died(env, mutex);
+		goto failed;
+	}
+#endif
 
 	PANIC_CHECK(env);
+
+	if ((ms <<= 1) > max_ms)
+		ms = max_ms;
 	goto loop;
 
+failed:
+	if (event != NULL) {
+		CloseHandle(event);
+		InterlockedDecrement(&mutexp->nwaiters);
+	}
+	if (state != NULL)
+		state->action = MUTEX_ACTION_UNLOCKED;
+	return (ret);
+
 err:	__db_syserr(env, ret, DB_STR("2009",
 	    "Win32 read lock failed"));
 	return (__env_panic(env, __os_posix_err(ret)));
@@ -482,7 +559,8 @@ __db_win32_mutex_unlock(env, mutex)
 	DB_ENV *dbenv;
 	DB_MUTEX *mutexp;
 	HANDLE event;
-	int ret;
+	int ret, sharecount;
+	char description[DB_MUTEX_DESCRIBE_STRLEN];
 #ifdef MUTEX_DIAG
 	LARGE_INTEGER diag_now;
 #endif
@@ -510,6 +588,16 @@ __db_win32_mutex_unlock(env, mutex)
 	 */
 #ifdef HAVE_SHARED_LATCHES
 	if (F_ISSET(mutexp, DB_MUTEX_SHARED)) {
+		sharecount = atomic_read(&mutexp->sharecount);
+		if (sharecount == 0) {
+			if (!PANIC_ISSET(env)) {
+				__db_errx(env, DB_STR_A("2071",
+				    "Shared unlock %s: already unlocked", "%s"),
+				    __mutex_describe(env, mutex, description));
+				return (DB_RUNRECOVERY);
+			}
+			return (__env_panic(env, EACCES));
+		}
 		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
 			F_CLR(mutexp, DB_MUTEX_LOCKED);
 			if ((ret = InterlockedExchange(
@@ -519,12 +607,26 @@ __db_win32_mutex_unlock(env, mutex)
 				ret = DB_RUNRECOVERY;
 				goto err;
 			}
-		} else if (InterlockedDecrement(
-		    (interlocked_val)(&atomic_read(&mutexp->sharecount))) > 0)
-			return (0);
+		} else {
+			if (env->thr_hashtab != NULL &&
+			    (ret = __mutex_record_unlock(env, mutex)) != 0)
+			    return (ret);
+			if (InterlockedDecrement((interlocked_val)
+			    (&atomic_read(&mutexp->sharecount))) > 0)
+				return (0);
+		}
 	} else
 #endif
 	{
+		if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
+			if (!PANIC_ISSET(env)) {
+				__db_errx(env, DB_STR_A("2072",
+				    "Unlock %s: already unlocked", "%s"),
+				    __mutex_describe(env, mutex, description));
+				return (DB_RUNRECOVERY);
+			}
+			return (__env_panic(env, EACCES));
+		}
 		F_CLR(mutexp, DB_MUTEX_LOCKED);
 		MUTEX_UNSET(&mutexp->tas);
 	}
@@ -536,8 +638,8 @@ __db_win32_mutex_unlock(env, mutex)
 #ifdef MUTEX_DIAG
 		QueryPerformanceCounter(&diag_now);
 		printf(DB_STR_A("2011",
-		    "[%I64d]: Signalling mutex %p, id %d\n",
-		    "%I64d %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
+		    "[%lld]: Signalling mutex %p, id %d\n",
+		    "%lld %p %d"), diag_now.QuadPart, mutexp, mutexp->id);
 #endif
 		if (!PulseEvent(event)) {
 			ret = __os_get_syserr();
diff --git a/src/mutex/test_mutex.c b/src/mutex/test_mutex.c
index 24c18016..d6183bdb 100644
--- a/src/mutex/test_mutex.c
+++ b/src/mutex/test_mutex.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
  *
  * Standalone mutex tester for Berkeley DB mutexes.
  *
@@ -13,7 +13,6 @@
 #include "db_int.h"
 
 #ifdef DB_WIN32
-#define	MUTEX_THREAD_TEST	1
 
 extern int getopt(int, char * const *, const char *);
 
@@ -33,29 +32,13 @@ typedef HANDLE os_thread_t;
 #include <sys/wait.h>
 
 typedef pid_t os_pid_t;
-
-/*
- * There's only one mutex implementation that can't support thread-level
- * locking: UNIX/fcntl mutexes.
- *
- * The general Berkeley DB library configuration doesn't look for the POSIX
- * pthread functions, with one exception -- pthread_yield.
- *
- * Use these two facts to decide if we're going to build with or without
- * threads.
- */
-#if !defined(HAVE_MUTEX_FCNTL) && defined(HAVE_PTHREAD_YIELD)
-#define	MUTEX_THREAD_TEST	1
-
-#include <pthread.h>
-
 typedef pthread_t os_thread_t;
 
 #define	os_thread_create(thrp, attr, func, arg)				\
     pthread_create((thrp), (attr), (func), (arg))
 #define	os_thread_join(thr, statusp) pthread_join((thr), (statusp))
 #define	os_thread_self() pthread_self()
-#endif /* HAVE_PTHREAD_YIELD */
+
 #endif /* !DB_WIN32 */
 
 #define	OS_BAD_PID ((os_pid_t)-1)
@@ -76,28 +59,25 @@ typedef struct {
 	u_int	   wakeme;			/* Request to awake. */
 } TM;
 
-DB_ENV	*dbenv;					/* Backing environment */
+DB_ENV	*dbenv;					/* Backing environment. */
 ENV	*env;
 size_t	 len;					/* Backing data chunk size. */
 
+u_int	alignment = 0;				/* Specify mutex alignment. */
+
 u_int8_t *gm_addr;				/* Global mutex */
 u_int8_t *lm_addr;				/* Locker mutexes */
 u_int8_t *tm_addr;				/* Thread mutexes */
 
-#ifdef MUTEX_THREAD_TEST
 os_thread_t *kidsp;				/* Locker threads */
 os_thread_t  wakep;				/* Wakeup thread */
-#endif
 
 #ifndef	HAVE_MMAP
 u_int	nprocs = 1;				/* -p: Processes. */
 u_int	nthreads = 20;				/* -t: Threads. */
-#elif	MUTEX_THREAD_TEST
+#else
 u_int	nprocs = 5;				/* -p: Processes. */
 u_int	nthreads = 4;				/* -t: Threads. */
-#else
-u_int	nprocs = 20;				/* -p: Processes. */
-u_int	nthreads = 1;				/* -t: Threads. */
 #endif
 
 u_int	maxlocks = 20;				/* -l: Backing locks. */
@@ -147,8 +127,11 @@ main(argc, argv)
 	rtype = PARENT;
 	id = 0;
 	tmpath = argv[0];
-	while ((ch = getopt(argc, argv, "l:n:p:T:t:v")) != EOF)
+	while ((ch = getopt(argc, argv, "a:l:n:p:T:t:v")) != EOF)
 		switch (ch) {
+		case 'a':
+			alignment = (u_int)atoi(optarg);
+			break;
 		case 'l':
 			maxlocks = (u_int)atoi(optarg);
 			break;
@@ -161,14 +144,6 @@ main(argc, argv)
 		case 't':
 			if ((nthreads = (u_int)atoi(optarg)) == 0)
 				nthreads = 1;
-#if !defined(MUTEX_THREAD_TEST)
-			if (nthreads != 1) {
-				fprintf(stderr,
-    "%s: thread support not available or not compiled for this platform.\n",
-				    progname);
-				return (EXIT_FAILURE);
-			}
-#endif
 			break;
 		case 'T':
 			if (!memcmp(optarg, "locker", sizeof("locker") - 1))
@@ -242,7 +217,11 @@ main(argc, argv)
 	 *
 	 * Clean up from any previous runs.
 	 */
+#ifdef DB_WIN32
+	snprintf(cmd, sizeof(cmd), "rmdir /S /Q %s", TESTDIR);
+#else
 	snprintf(cmd, sizeof(cmd), "rm -rf %s", TESTDIR);
+#endif
 	(void)system(cmd);
 	snprintf(cmd, sizeof(cmd), "mkdir %s", TESTDIR);
 	(void)system(cmd);
@@ -292,8 +271,8 @@ main(argc, argv)
 
 		/* Wait for all lockers to exit. */
 		if ((err = os_wait(pids, nprocs)) != 0) {
-			fprintf(stderr, "%s: locker wait failed with %d\n",
-			    progname, err);
+			fprintf(stderr, "%s: locker wait failed with %s\n",
+			    progname, db_strerror(err));
 			goto fail;
 		}
 
@@ -357,7 +336,6 @@ int
 locker_start(id)
 	u_long id;
 {
-#if defined(MUTEX_THREAD_TEST)
 	u_int i;
 	int err;
 
@@ -378,17 +356,13 @@ locker_start(id)
 			return (1);
 		}
 	return (0);
-#else
-	return (run_lthread((void *)id) == NULL ? 0 : 1);
-#endif
 }
 
 int
 locker_wait()
 {
-#if defined(MUTEX_THREAD_TEST)
 	u_int i;
-	void *retp;
+	void *retp = NULL;
 
 	/* Wait for the threads to exit. */
 	for (i = 0; i < nthreads; i++) {
@@ -400,7 +374,6 @@ locker_wait()
 		}
 	}
 	free(kidsp);
-#endif
 	return (0);
 }
 
@@ -414,11 +387,7 @@ run_lthread(arg)
 	int err, i;
 
 	id = (u_long)arg;
-#if defined(MUTEX_THREAD_TEST)
 	tid = (u_long)os_thread_self();
-#else
-	tid = 0;
-#endif
 	printf("Locker: ID %03lu (PID: %lu; TID: %lx)\n",
 	    id, (u_long)getpid(), tid);
 
@@ -534,7 +503,6 @@ int
 wakeup_start(id)
 	u_long id;
 {
-#if defined(MUTEX_THREAD_TEST)
 	int err;
 
 	/*
@@ -547,16 +515,12 @@ wakeup_start(id)
 		return (1);
 	}
 	return (0);
-#else
-	return (run_wthread((void *)id) == NULL ? 0 : 1);
-#endif
 }
 
 int
 wakeup_wait()
 {
-#if defined(MUTEX_THREAD_TEST)
-	void *retp;
+	void *retp = NULL;
 
 	/*
 	 * A file is created when the wakeup thread is no longer needed.
@@ -567,7 +531,6 @@ wakeup_wait()
 		    "%s: wakeup thread exited with error\n", progname);
 		return (1);
 	}
-#endif
 	return (0);
 }
 
@@ -586,11 +549,7 @@ run_wthread(arg)
 
 	id = (u_long)arg;
 	quitcheck = 0;
-#if defined(MUTEX_THREAD_TEST)
 	tid = (u_long)os_thread_self();
-#else
-	tid = 0;
-#endif
 	printf("Wakeup: ID %03lu (PID: %lu; TID: %lx)\n",
 	    id, (u_long)getpid(), tid);
 
@@ -683,6 +642,12 @@ tm_env_init()
 		home = TESTDIR;
 	if (nthreads != 1)
 		flags |= DB_THREAD;
+	if (alignment != 0 &&
+	    (ret = dbenv->mutex_set_align(dbenv, alignment)) != 0) {
+		dbenv->err(dbenv, ret, "set_align(%d): %s", alignment, home);
+		return (1);
+	}
+
 	if ((ret = dbenv->open(dbenv, home, flags, 0)) != 0) {
 		dbenv->err(dbenv, ret, "environment open: %s", home);
 		return (1);
@@ -748,8 +713,10 @@ tm_mutex_init()
 	if (verbose)
 		printf("\n");
 
-	if (verbose)
+	if (verbose) {
+		(void)dbenv->mutex_stat_print(dbenv, DB_STAT_ALL);
 		printf("Allocate %d per-lock mutexes: ", maxlocks);
+	}
 	for (i = 0; i < maxlocks; ++i) {
 		mp = (TM *)(lm_addr + i * sizeof(TM));
 		if ((err = dbenv->mutex_alloc(dbenv, 0, &mp->mutex)) != 0) {
@@ -930,7 +897,7 @@ int
 usage()
 {
 	fprintf(stderr, "usage: %s %s\n\t%s\n", progname,
-	    "[-v] [-l maxlocks]",
+	    "[-a alignment] [-v] [-l maxlocks]",
 	    "[-n locks] [-p procs] [-T locker=ID|wakeup=ID] [-t threads]");
 	return (EXIT_FAILURE);
 }
diff --git a/src/mutex/uts4_cc.s b/src/mutex/uts4_cc.s
index 4f59e9c8..76eeed6c 100644
--- a/src/mutex/uts4_cc.s
+++ b/src/mutex/uts4_cc.s
@@ -1,6 +1,6 @@
  / See the file LICENSE for redistribution information.
  /
- / Copyright (c) 1997, 2012 Oracle and/or its affiliates.  All rights reserved.
+ / Copyright (c) 1997, 2015 Oracle and/or its affiliates.  All rights reserved.
  /
  / $Id$
  /
author	Lorry Tar Creator <lorry-tar-importer@baserock.org>	2015-02-17 17:25:57 +0000
committer	<>	2015-03-17 16:26:24 +0000
commit	780b92ada9afcf1d58085a83a0b9e6bc982203d1 (patch)
tree	598f8b9fa431b228d29897e798de4ac0c1d3d970 /src/mutex
parent	7a2660ba9cc2dc03a69ddfcfd95369395cc87444 (diff)
download	berkeleydb-master.tar.gz