summaryrefslogtreecommitdiff
path: root/src/mutex/mut_pthread.c
blob: 4b2cfb81201aab5be339136acc1646638723e508 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 1999, 2015 Oracle and/or its affiliates.  All rights reserved.
 *
 * $Id$
 */

#include "db_config.h"

#include "db_int.h"
#include "dbinc/lock.h"

/*
 * This is where we load in architecture/compiler specific mutex code.
 */
#define	LOAD_ACTUAL_MUTEX_CODE

#ifdef HAVE_MUTEX_SOLARIS_LWP
#define	pthread_cond_destroy(x)		0
#define	pthread_cond_signal		_lwp_cond_signal
#define	pthread_cond_broadcast		_lwp_cond_broadcast
#define	pthread_cond_wait		_lwp_cond_wait
#define	pthread_mutex_destroy(x)	0
#define	pthread_mutex_lock		_lwp_mutex_lock
#define	pthread_mutex_trylock		_lwp_mutex_trylock
#define	pthread_mutex_unlock		_lwp_mutex_unlock
#endif
#ifdef HAVE_MUTEX_UI_THREADS
#define	pthread_cond_destroy(x)		cond_destroy
#define	pthread_cond_broadcast		cond_broadcast
#define	pthread_cond_wait		cond_wait
#define	pthread_mutex_destroy		mutex_destroy
#define	pthread_mutex_lock		mutex_lock
#define	pthread_mutex_trylock		mutex_trylock
#define	pthread_mutex_unlock		mutex_unlock
#endif

/*
 * According to HP-UX engineers contacted by Netscape,
 * pthread_mutex_unlock() will occasionally return EFAULT for no good reason
 * on mutexes in shared memory regions, and the correct caller behavior
 * is to try again.  Do so, up to EFAULT_RETRY_ATTEMPTS consecutive times.
 * Note that we don't bother to restrict this to HP-UX;
 * it should be harmless elsewhere. [#2471]
 */
#define	EFAULT_RETRY_ATTEMPTS	5
#define	RETRY_ON_EFAULT(func_invocation, ret) do {	\
	int i;						\
	i = EFAULT_RETRY_ATTEMPTS;			\
	do {						\
		RET_SET((func_invocation), ret);	\
	} while (ret == EFAULT && --i > 0);		\
} while (0)

/*
 * IBM's MVS pthread mutex implementation returns -1 and sets errno rather than
 * returning errno itself.  As -1 is not a valid errno value, assume functions
 * returning -1 have set errno.  If they haven't, return a random error value.
 */
#define	RET_SET(f, ret) do {						\
	if (((ret) = (f)) == -1 && ((ret) = errno) == 0)		\
		(ret) = EAGAIN;						\
} while (0)

/*
 * !!!
 * Solaris bug workaround: pthread_cond_wait() sometimes returns ETIME  -- out
 * of sheer paranoia, check both ETIME and ETIMEDOUT.  We believe this happens
 * when the application uses SIGALRM for some purpose, e.g., the C library sleep
 * call, and Solaris delivers the signal to the wrong LWP.
 */
#ifdef ETIME
#define	ETIME_TO_ETIMEDOUT(ret)	((ret) == ETIME ? ETIMEDOUT : (ret))
#else
#define	ETIME_TO_ETIMEDOUT(ret)	(ret)
#endif

/*
 * __db_pthread_mutex_init --
 *	Initialize a pthread mutex: either a native one or
 *	just the mutex for block/wakeup of a hybrid test-and-set mutex
 *
 *
 * PUBLIC: int __db_pthread_mutex_init __P((ENV *, db_mutex_t, u_int32_t));
 */
int
__db_pthread_mutex_init(env, mutex, flags)
	ENV *env;
	db_mutex_t mutex;
	u_int32_t flags;
{
	DB_MUTEX *mutexp;
	int ret;

	mutexp = MUTEXP_SET(env, mutex);
	ret = 0;

#ifndef HAVE_MUTEX_HYBRID
	/* Can't have self-blocking shared latches.  */
	DB_ASSERT(env, !LF_ISSET(DB_MUTEX_SELF_BLOCK) ||
	    !LF_ISSET(DB_MUTEX_SHARED));
#endif

#ifdef HAVE_MUTEX_PTHREADS
	{
#ifndef HAVE_MUTEX_THREAD_ONLY
	pthread_condattr_t condattr;
	pthread_mutexattr_t mutexattr;
#endif
	pthread_condattr_t *condattrp = NULL;
	pthread_mutexattr_t *mutexattrp = NULL;

#ifndef HAVE_MUTEX_HYBRID
	if (LF_ISSET(DB_MUTEX_SHARED)) {
#if defined(HAVE_SHARED_LATCHES)
		pthread_rwlockattr_t rwlockattr, *rwlockattrp = NULL;
#ifndef HAVE_MUTEX_THREAD_ONLY
		if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
			RET_SET(pthread_rwlockattr_init(&rwlockattr), ret);
			if (ret != 0)
				goto err;
			RET_SET(pthread_rwlockattr_setpshared(
			    &rwlockattr, PTHREAD_PROCESS_SHARED), ret);
			rwlockattrp = &rwlockattr;
		}
#endif

		if (ret == 0)
			RET_SET(pthread_rwlock_init(&mutexp->u.rwlock,
			    rwlockattrp), ret);
		if (rwlockattrp != NULL)
			(void)pthread_rwlockattr_destroy(rwlockattrp);

		F_SET(mutexp, DB_MUTEX_SHARED);
		/* For rwlocks, we're done - cannot use the mutex or cond */
		goto err;
#endif
	}
#endif
#ifndef HAVE_MUTEX_THREAD_ONLY
	if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
		RET_SET(pthread_mutexattr_init(&mutexattr), ret);
		if (ret != 0)
			goto err;
		RET_SET(pthread_mutexattr_setpshared(
		    &mutexattr, PTHREAD_PROCESS_SHARED), ret);
		mutexattrp = &mutexattr;
	}
#endif

	if (ret == 0)
		RET_SET(
		    pthread_mutex_init(&mutexp->u.m.mutex, mutexattrp), ret);

	if (mutexattrp != NULL)
		(void)pthread_mutexattr_destroy(mutexattrp);
	if (ret != 0)
		goto err;
	if (LF_ISSET(DB_MUTEX_SELF_BLOCK)) {
#ifndef HAVE_MUTEX_THREAD_ONLY
		if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
			RET_SET(pthread_condattr_init(&condattr), ret);
			if (ret != 0)
				goto err;

			condattrp = &condattr;
			RET_SET(pthread_condattr_setpshared(
			    &condattr, PTHREAD_PROCESS_SHARED), ret);
		}
#endif

		if (ret == 0)
			RET_SET(pthread_cond_init(
			    &mutexp->u.m.cond, condattrp), ret);

		F_SET(mutexp, DB_MUTEX_SELF_BLOCK);
		if (condattrp != NULL)
			(void)pthread_condattr_destroy(condattrp);
	}

	}
#endif
#ifdef HAVE_MUTEX_SOLARIS_LWP
	/*
	 * XXX
	 * Gcc complains about missing braces in the static initializations of
	 * lwp_cond_t and lwp_mutex_t structures because the structures contain
	 * sub-structures/unions and the Solaris include file that defines the
	 * initialization values doesn't have surrounding braces.  There's not
	 * much we can do.
	 */
	if (LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
		static lwp_mutex_t mi = DEFAULTMUTEX;

		mutexp->mutex = mi;
	} else {
		static lwp_mutex_t mi = SHAREDMUTEX;

		mutexp->mutex = mi;
	}
	if (LF_ISSET(DB_MUTEX_SELF_BLOCK)) {
		if (LF_ISSET(DB_MUTEX_PROCESS_ONLY)) {
			static lwp_cond_t ci = DEFAULTCV;

			mutexp->cond = ci;
		} else {
			static lwp_cond_t ci = SHAREDCV;

			mutexp->cond = ci;
		}
		F_SET(mutexp, DB_MUTEX_SELF_BLOCK);
	}
#endif
#ifdef HAVE_MUTEX_UI_THREADS
	{
	int type;

	type = LF_ISSET(DB_MUTEX_PROCESS_ONLY) ? USYNC_THREAD : USYNC_PROCESS;

	ret = mutex_init(&mutexp->mutex, type, NULL);
	if (ret == 0 && LF_ISSET(DB_MUTEX_SELF_BLOCK)) {
		ret = cond_init(&mutexp->cond, type, NULL);

		F_SET(mutexp, DB_MUTEX_SELF_BLOCK);
	}}
#endif

err:	if (ret != 0) {
		__db_err(env, ret, DB_STR("2021",
		    "unable to initialize mutex"));
	}
	return (ret);
}

/*
 * __db_pthread_mutex_prep
 *	Prepare to use a pthread-based DB_MUTEX.
 *
 *	This exclusively locks a DB_MUTEX's pthread_mutex_t or pthread_rwlock_t,
 *	before locking, unlocking, or waiting for the DB mutex to be become
 *	available in the requested mode (exclusive == 1, shared == 0).
 *
 *	Test for failchk concerns here too, to avoid hanging on a dead pid/tid.
 */
inline static int
__db_pthread_mutex_prep(env, mutex, mutexp, exclusive)
	ENV *env;
	db_mutex_t mutex;
	DB_MUTEX *mutexp;
	int exclusive;
{
	DB_ENV *dbenv;
	DB_THREAD_INFO *ip;
#ifdef HAVE_FAILCHK_BROADCAST
	db_timespec timespec;
#endif
	int ret;

	dbenv = env->dbenv;
	PERFMON4(env,
	    mutex, suspend, mutex, exclusive, mutexp->alloc_id, mutexp);
	if (F_ISSET(dbenv, DB_ENV_FAILCHK)) {
		for (;;) {
			RET_SET_PTHREAD_TRYLOCK(mutexp, ret);
			if (ret != EBUSY)
				break;
			if (dbenv->is_alive(dbenv,
			    mutexp->pid, mutexp->tid, 0) == 0) {
				ret = __env_set_state(env, &ip, THREAD_VERIFY);
				if (ret != 0 ||
				    ip->dbth_state == THREAD_FAILCHK) {
					ret = DB_RUNRECOVERY;
				} else {
					/*
					 * Some thread other than the true
					 * FAILCHK thread in this process is
					 * asking for the mutex held by the
					 * dead process/thread.  We will block
					 * here until someone else does the
					 * cleanup.  Same behavior as if we
					 * hadn't gone down the 'if
					 * DB_ENV_FAILCHK' path to start with.
					 */
					goto lockit;
				}
				__os_yield(env, 0, 10);
			}
		}
	} else {
lockit:
#ifdef HAVE_FAILCHK_BROADCAST
		if (dbenv->mutex_failchk_timeout != 0) {
			timespecclear(&timespec);
			__clock_set_expires(env,
			    &timespec, dbenv->mutex_failchk_timeout);
			do {
				RET_SET_PTHREAD_TIMEDLOCK(mutexp,
				    (struct timespec *)&timespec, ret);
				ret = ETIME_TO_ETIMEDOUT(ret);
				if (ret == ETIMEDOUT &&
				    F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
				    !F_ISSET(dbenv, DB_ENV_FAILCHK))
					ret = USR_ERR(env,
					    __mutex_died(env, mutex));
			} while  (ret == ETIMEDOUT);
		} else
#endif
			RET_SET_PTHREAD_LOCK(mutexp, ret);
	}

	PERFMON4(env,
	    mutex, resume, mutex, exclusive, mutexp->alloc_id, mutexp);
	COMPQUIET(mutex, 0);
	COMPQUIET(exclusive, 0);
	return (ret);
}

/*
 * __db_pthread_mutex_condwait
 *	Perform a pthread condition wait for a DB_MUTEX.
 *
 *	This will be a timed wait when a timespec has been specified. EINTR and
 *	spurious ETIME* values are mapped to 0, and hence success.  The
 *	mutexp->u.m.mutex must be locked upon entry. When returning a success
 *	or timeout status it will have been locked again.
 *
 *	Returns:
 *		0 if it is safe to retry to get the mutex
 *		DB_TIMEOUT if the timeout exceeded
 *		<other> a fatal error. The mutexp->u.m.mutex has been unlocked.
 */
inline static int
__db_pthread_mutex_condwait(env, mutex, mutexp, timespec)
	ENV *env;
	db_mutex_t mutex;
	DB_MUTEX *mutexp;
	db_timespec *timespec;
{
	DB_ENV *dbenv;
	int ret;
#ifdef HAVE_FAILCHK_BROADCAST
	db_timespec failchk_timespec;
#endif

	dbenv = env->dbenv;
	PERFMON4(env, mutex, suspend, mutex, TRUE, mutexp->alloc_id, mutexp);

#ifdef HAVE_FAILCHK_BROADCAST
	/*
	 * If the failchk timeout would be soon than the timeout passed in,
	 * argument, use the failchk timeout. The caller handles "short" waits.
	 */
	if (dbenv->mutex_failchk_timeout != 0) {
		timespecclear(&failchk_timespec);
		__clock_set_expires(env,
		    &failchk_timespec, dbenv->mutex_failchk_timeout);
		if (timespec == NULL ||
		    timespeccmp(timespec, &failchk_timespec, >))
			timespec = &failchk_timespec;
	}
#endif

	if (timespec != NULL) {
		RET_SET(pthread_cond_timedwait(&mutexp->u.m.cond,
		    &mutexp->u.m.mutex, (struct timespec *) timespec), ret);
		ret = ETIME_TO_ETIMEDOUT(ret);
#ifdef HAVE_FAILCHK_BROADCAST
		if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
		    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
			ret = USR_ERR(env, __mutex_died(env, mutex));
			goto err;
		}
#endif
		if (ret == ETIMEDOUT) {
			ret = DB_TIMEOUT;
			goto err;
		}
	} else
		RET_SET(pthread_cond_wait(&mutexp->u.m.cond,
		    &mutexp->u.m.mutex), ret);
#ifdef HAVE_FAILCHK_BROADCAST
	if (ret == 0 && F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
	    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
		ret = USR_ERR(env, __mutex_died(env, mutex));
		goto err;
	}
#endif
	/*
	 * !!!
	 * Solaris bug workaround: pthread_cond_wait() sometimes returns ETIME
	 * -- out of sheer paranoia, check both ETIME and ETIMEDOUT.  We
	 * believe this happens when the application uses SIGALRM for some
	 * purpose, e.g., the C library sleep call, and Solaris delivers the
	 * signal to the wrong LWP.
	 */
	if (ret != 0) {
		if ((ret = ETIME_TO_ETIMEDOUT(ret)) == ETIMEDOUT ||
		    ret == EINTR)
			ret = 0;
		else {
			/* Failure, caller shouldn't condwait again. */
			(void)pthread_mutex_unlock(&mutexp->u.m.mutex);
			(void)MUTEX_ERR(env, mutex, ret);
		}
	}

err:
	PERFMON4(env, mutex, resume, mutex, TRUE, mutexp->alloc_id, mutexp);

	COMPQUIET(mutex, 0);
	COMPQUIET(env, 0);
	return (ret);
}

#ifndef HAVE_MUTEX_HYBRID
/*
 * __db_pthread_mutex_lock
 *	Lock on a mutex, blocking if necessary.
 *	Timeouts are supported only for self-blocking mutexes. When both a
 *	given timeout and a dbenv-wide failchk timeout are specified, the
 *	given timeout takes precedence -- a process failure might not be noticed
 *	for a little while.
 *
 *	Self-blocking shared latches are not supported.
 *
 * PUBLIC: #ifndef HAVE_MUTEX_HYBRID
 * PUBLIC: int __db_pthread_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t));
 * PUBLIC: #endif
 */
int
__db_pthread_mutex_lock(env, mutex, timeout)
	ENV *env;
	db_mutex_t mutex;
	db_timeout_t timeout;
{
	DB_ENV *dbenv;
	DB_MUTEX *mutexp;
	db_timeout_t checktimeout;
	db_timespec timespec;
	int ret, t_ret;

	dbenv = env->dbenv;

	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
		return (0);

	t_ret = 0;
	mutexp = MUTEXP_SET(env, mutex);

	CHECK_MTX_THREAD(env, mutexp);

	/*
	 * We want to know which mutexes are contentious, but don't want to
	 * do an interlocked test here -- that's slower when the underlying
	 * system has adaptive mutexes and can perform optimizations like
	 * spinning only if the thread holding the mutex is actually running
	 * on a CPU.  Make a guess, using a normal load instruction.
	 */
	if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
		STAT_INC(env, mutex, set_wait, mutexp->mutex_set_wait, mutex);
	else
		STAT_INC(env,
		    mutex, set_nowait, mutexp->mutex_set_nowait, mutex);

	checktimeout = timeout;
#ifdef HAVE_FAILCHK_BROADCAST
	if (checktimeout == 0 || checktimeout > dbenv->mutex_failchk_timeout)
		checktimeout = dbenv->mutex_failchk_timeout;
#endif

	/* Single-thread the next block, except during the possible condwait. */
	if ((ret = __db_pthread_mutex_prep(env, mutex, mutexp, TRUE)) != 0)
		goto err;

	if (F_ISSET(mutexp, DB_MUTEX_SELF_BLOCK)) {
		if (checktimeout != 0)
			timespecclear(&timespec);
		while (MUTEXP_IS_BUSY(mutexp)) {
			/* Set expiration timer upon first need. */
			if (checktimeout != 0 && !timespecisset(&timespec))
				__clock_set_expires(env, &timespec, timeout);
			t_ret = __db_pthread_mutex_condwait(env,
			    mutex, mutexp, timeout == 0 ? NULL : &timespec);
			if (t_ret != 0) {
				if (t_ret == DB_TIMEOUT)
					goto out;
				ret = t_ret;
				goto err;
			}
		}

		F_SET(mutexp, DB_MUTEX_LOCKED);
		dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
out:
		/* #2471: HP-UX can sporadically return EFAULT. See above */
		RETRY_ON_EFAULT(pthread_mutex_unlock(&mutexp->u.m.mutex), ret);
		if (ret != 0) {
			(void)MUTEX_ERR(env, mutex, ret);
			goto err;
		}
	} else {
#ifdef DIAGNOSTIC
		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
			char buf[DB_THREADID_STRLEN];
			(void)dbenv->thread_id_string(dbenv,
			    mutexp->pid, mutexp->tid, buf);
			ret = MUTEX_ERR(env, mutex, EINVAL);
			__db_errx(env, DB_STR_A("2022",
		    "pthread lock failed: lock currently in use: pid/tid: %s",
			    "%s"), buf);
			goto err;
		}
#endif
		F_SET(mutexp, DB_MUTEX_LOCKED);
		dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
	}

#ifdef DIAGNOSTIC
	/*
	 * We want to switch threads as often as possible.  Yield every time
	 * we get a mutex to ensure contention.
	 */
	if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
		__os_yield(env, 0, 0);
#endif
#ifdef MUTEX_DIAG
	if (t_ret == 0) {
		__os_gettime(env, &mutexp->mutex_history.when, 0);
		__os_stack_text(env, mutexp->mutex_history.stacktext,
		    sizeof(mutexp->mutex_history.stacktext), 12, 2);
	}
#endif
	return (t_ret);

err:
	__db_err(env, ret, DB_STR("2023", "pthread lock failed"));
	return (__env_panic(env, ret));
}
#endif

#if defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_HYBRID)
/*
 * __db_pthread_mutex_readlock
 *	Take a shared lock on a mutex, blocking if necessary.
 *
 * PUBLIC: #if defined(HAVE_SHARED_LATCHES)
 * PUBLIC: int __db_pthread_mutex_readlock __P((ENV *, db_mutex_t));
 * PUBLIC: #endif
 */
int
__db_pthread_mutex_readlock(env, mutex)
	ENV *env;
	db_mutex_t mutex;
{
	DB_ENV *dbenv;
	DB_MUTEX *mutexp;
	MUTEX_STATE *state;
#ifdef HAVE_FAILCHK_BROADCAST
	db_timespec timespec;
#endif
	int ret;

	dbenv = env->dbenv;

	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
		return (0);

	mutexp = MUTEXP_SET(env, mutex);
	DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_SHARED));

	CHECK_MTX_THREAD(env, mutexp);

	/*
	 * We want to know which mutexes are contentious, but don't want to
	 * do an interlocked test here -- that's slower when the underlying
	 * system has adaptive mutexes and can perform optimizations like
	 * spinning only if the thread holding the mutex is actually running
	 * on a CPU.  Make a guess, using a normal load instruction.
	 */
	if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
		STAT_INC(env,
		    mutex, set_rd_wait, mutexp->mutex_set_rd_wait, mutex);
	else
		STAT_INC(env,
		    mutex, set_rd_nowait, mutexp->mutex_set_rd_nowait, mutex);

	state = NULL;
	if (env->thr_hashtab != NULL && (ret = __mutex_record_lock(env,
	    mutex, MUTEX_ACTION_INTEND_SHARE, &state)) != 0)
		return (ret);

	PERFMON4(env, mutex, suspend, mutex, FALSE, mutexp->alloc_id, mutexp);

#ifdef HAVE_FAILCHK_BROADCAST
	if (dbenv->mutex_failchk_timeout != 0) {
		do {
			timespecclear(&timespec);
			__clock_set_expires(env,
			    &timespec, dbenv->mutex_failchk_timeout);
			RET_SET(pthread_rwlock_timedrdlock(&mutexp->u.rwlock,
			    (struct timespec *)&timespec), ret);
			if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
			    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
				if (ret == 0)
					RETRY_ON_EFAULT(pthread_rwlock_unlock(
					    &mutexp->u.rwlock), ret);
				ret = USR_ERR(env, __mutex_died(env, mutex));
				goto err;
			}
		} while (ret == DB_TIMEOUT);
	} else
#endif
		RET_SET(pthread_rwlock_rdlock(&mutexp->u.rwlock), ret);

	PERFMON4(env, mutex, resume, mutex, FALSE, mutexp->alloc_id, mutexp);
	DB_ASSERT(env, !F_ISSET(mutexp, DB_MUTEX_LOCKED));
	if (ret != 0)
		goto err;

#ifdef HAVE_FAILCHK_BROADCAST
	if (F_ISSET(mutexp, DB_MUTEX_OWNER_DEAD) &&
	    !F_ISSET(dbenv, DB_ENV_FAILCHK)) {
		ret = USR_ERR(env, __mutex_died(env, mutex));
		goto err;
	}
#endif
#ifdef MUTEX_DIAG
	__os_gettime(env, &mutexp->mutex_history.when, 0);
	__os_stack_text(env, mutexp->mutex_history.stacktext,
	    sizeof(mutexp->mutex_history.stacktext), 12, 2);
#endif
#ifdef DIAGNOSTIC
	/*
	 * We want to switch threads as often as possible.  Yield every time
	 * we get a mutex to ensure contention.
	 */
	if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
		__os_yield(env, 0, 0);
#endif
	return (0);

err:
	if (state != NULL)
		state->action = MUTEX_ACTION_UNLOCKED;
	__db_err(env, ret, DB_STR("2024", "pthread readlock failed"));
	return (__env_panic(env, ret));
}
#endif

#ifdef HAVE_MUTEX_HYBRID
/*
 * __db_hybrid_mutex_suspend
 *	Suspend this thread, usually until the mutex is free enough to give the
 *	caller a good chance of getting the mutex in the requested exclusivity
 *	mode. Return early if the timeout is reached or a dead mutex is found
 *	to be dead.
 *
 *	The major difference between this and the old __db_pthread_mutex_lock()
 *	is the additional 'exclusive' parameter.
 *
 * PUBLIC: #ifdef HAVE_MUTEX_HYBRID
 * PUBLIC: int __db_hybrid_mutex_suspend
 * PUBLIC:	__P((ENV *, db_mutex_t, db_timespec *, int));
 * PUBLIC: #endif
 */
int
__db_hybrid_mutex_suspend(env, mutex, timespec, exclusive)
	ENV *env;
	db_mutex_t mutex;
	db_timespec *timespec;
	int exclusive;
{
	DB_MUTEX *mutexp;
#ifdef HAVE_FAILCHECK_BROADCAST
	db_timespec failchk_timespec;
#endif
	int ret, t_ret;

	t_ret = 0;
	mutexp = MUTEXP_SET(env, mutex);

	if (!exclusive)
		DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_SHARED));
	DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_SELF_BLOCK));

	if ((ret = __db_pthread_mutex_prep(env, mutex, mutexp, exclusive)) != 0)
		goto err;

	/*
	 * Since this is only for hybrid mutexes the pthread mutex
	 * is only used to wait after spinning on the TAS mutex.
	 * Set the wait flag before checking to see if the mutex
	 * is still locked.  The holder will clear DB_MUTEX_LOCKED
	 * before checking the wait counter.
	 */
	mutexp->wait++;
	(void)MUTEX_MEMBAR(mutexp->wait);
	while (exclusive ? MUTEXP_IS_BUSY(mutexp) :
	    atomic_read(&mutexp->sharecount) == MUTEX_SHARE_ISEXCLUSIVE) {
		t_ret = __db_pthread_mutex_condwait(env,
		    mutex, mutexp, timespec);
		if (t_ret != 0) {
			if (t_ret == DB_TIMEOUT)
				break;
			ret = t_ret;
			goto err;
		}
		(void)MUTEX_MEMBAR(mutexp->flags);
	}

	mutexp->wait--;

	/* #2471: HP-UX can sporadically return EFAULT. See above */
	RETRY_ON_EFAULT(pthread_mutex_unlock(&mutexp->u.m.mutex), ret);
	if (ret != 0)
		goto err;

	PERFMON4(env,
	    mutex, resume, mutex, exclusive, mutexp->alloc_id, mutexp);

#ifdef DIAGNOSTIC
	/*
	 * We want to switch threads as often as possible.  Yield every time
	 * we get a mutex to ensure contention.
	 */
	if (F_ISSET(env->dbenv, DB_ENV_YIELDCPU))
		__os_yield(env, 0, 0);
#endif
	return (t_ret);

err:
	PERFMON4(env,
	    mutex, resume, mutex, exclusive, mutexp->alloc_id, mutexp);
	__db_err(env, ret, "pthread suspend failed");
	return (__env_panic(env, ret));
}
#endif

/*
 * __db_pthread_mutex_unlock --
 *	Release a mutex, or, if hybrid, wake a thread up from a suspend.
 *
 * PUBLIC: int __db_pthread_mutex_unlock __P((ENV *, db_mutex_t));
 */
int
__db_pthread_mutex_unlock(env, mutex)
	ENV *env;
	db_mutex_t mutex;
{
	DB_ENV *dbenv;
	DB_MUTEX *mutexp;
	int ret;
#ifndef HAVE_MUTEX_HYBRID
	char description[DB_MUTEX_DESCRIBE_STRLEN];
#endif

	dbenv = env->dbenv;

	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
		return (0);

	mutexp = MUTEXP_SET(env, mutex);

#if !defined(HAVE_MUTEX_HYBRID)
	if (!F_ISSET(mutexp, DB_MUTEX_LOCKED | DB_MUTEX_SHARED)) {
		if (!PANIC_ISSET(env))
			__db_errx(env, DB_STR("2069",
			    "pthread unlock %s: already unlocked"),
			    __mutex_describe(env, mutex, description));
		return (__env_panic(env, EACCES));
	}
#endif
	if (F_ISSET(mutexp, DB_MUTEX_SELF_BLOCK)) {
		ret = __db_pthread_mutex_prep(env, mutex, mutexp, TRUE);
		if (ret != 0)
			goto err;

#ifdef HAVE_MUTEX_HYBRID
		STAT_INC(env,
		    mutex, hybrid_wakeup, mutexp->hybrid_wakeup, mutex);
#else
		F_CLR(mutexp, DB_MUTEX_LOCKED);	/* nop if DB_MUTEX_SHARED */
#endif

		if (F_ISSET(mutexp, DB_MUTEX_SHARED))
			RET_SET(
			    pthread_cond_broadcast(&mutexp->u.m.cond), ret);
		else
			RET_SET(pthread_cond_signal(&mutexp->u.m.cond), ret);
		if (ret != 0)
			goto err;
	} else {
#ifndef HAVE_MUTEX_HYBRID

		if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
			F_CLR(mutexp, DB_MUTEX_LOCKED);
		else if (env->thr_hashtab != NULL &&
		    (ret = __mutex_record_unlock(env, mutex)) != 0)
		    	goto err;
#endif
	}

	/* See comment above; workaround for [#2471]. */
#if defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_HYBRID)
	if (F_ISSET(mutexp, DB_MUTEX_SHARED))
		RETRY_ON_EFAULT(pthread_rwlock_unlock(&mutexp->u.rwlock), ret);
	else
#endif
		RETRY_ON_EFAULT(pthread_mutex_unlock(&mutexp->u.m.mutex), ret);

err:	if (ret != 0) {
		__db_err(env, ret, "pthread unlock failed");
		return (__env_panic(env, ret));
	}
	return (ret);
}

/*
 * __db_pthread_mutex_destroy --
 *	Destroy a mutex.
 *	If it is a native shared latch (not hybrid) then
 *	destroy only one half of the rwlock/mutex&cond union,
 *	depending whether it was allocated as shared
 *
 * PUBLIC: int __db_pthread_mutex_destroy __P((ENV *, db_mutex_t));
 */
int
__db_pthread_mutex_destroy(env, mutex)
	ENV *env;
	db_mutex_t mutex;
{
	DB_MUTEX *mutexp;
	DB_THREAD_INFO *ip;
	int ret, t_ret, failchk_thread;

	if (!MUTEX_ON(env))
		return (0);

	mutexp = MUTEXP_SET(env, mutex);

	ret = 0;
	failchk_thread = FALSE;
	/* Get information to determine if we are really the failchk thread. */
	if (F_ISSET(env->dbenv, DB_ENV_FAILCHK)) {
		ret = __env_set_state(env, &ip, THREAD_VERIFY);
		if (ip != NULL && ip->dbth_state == THREAD_FAILCHK)
			failchk_thread = TRUE;
	}

#ifndef HAVE_MUTEX_HYBRID
	if (F_ISSET(mutexp, DB_MUTEX_SHARED)) {
#if defined(HAVE_SHARED_LATCHES)
		/*
		 * If there were dead processes waiting on the condition
		 * we may not be able to destroy it.  Let failchk thread skip
		 * this, unless destroy is required.
		 * XXX What operating system resources might this leak?
		 */
#ifdef HAVE_PTHREAD_RWLOCK_REINIT_OKAY
		if (!failchk_thread)
#endif
			RET_SET(
			    pthread_rwlock_destroy(&mutexp->u.rwlock), ret);
		/* For rwlocks, we're done - must not destroy rest of union */
		return (ret);
#endif
	}
#endif
	if (F_ISSET(mutexp, DB_MUTEX_SELF_BLOCK)) {
		/*
		 * If there were dead processes waiting on the condition
		 * we may not be able to destroy it.  Let failchk thread
		 * skip this, unless destroy is required.
		 */
#ifdef HAVE_PTHREAD_COND_REINIT_OKAY
		if (!failchk_thread)
#endif
			RET_SET(pthread_cond_destroy(&mutexp->u.m.cond), ret);
		if (ret != 0)
			__db_err(env, ret, DB_STR("2026",
			    "unable to destroy cond"));
	}
	RET_SET(pthread_mutex_destroy(&mutexp->u.m.mutex), t_ret);
	if (t_ret != 0 && !failchk_thread) {
		__db_err(env, t_ret, DB_STR("2027", "unable to destroy mutex"));
		if (ret == 0)
			ret = t_ret;
	}
	return (ret);
}