summaryrefslogtreecommitdiff
path: root/src/include/storage/s_lock.h
blob: 895abe672b836bc2ef15d5ca9bac1820ebeb7f4b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
/*-------------------------------------------------------------------------
 *
 * s_lock.h
 *	   Hardware-dependent implementation of spinlocks.
 *
 *	NOTE: none of the macros in this file are intended to be called directly.
 *	Call them through the hardware-independent macros in spin.h.
 *
 *	The following hardware-dependent macros must be provided for each
 *	supported platform:
 *
 *	void S_INIT_LOCK(slock_t *lock)
 *		Initialize a spinlock (to the unlocked state).
 *
 *	int S_LOCK(slock_t *lock)
 *		Acquire a spinlock, waiting if necessary.
 *		Time out and abort() if unable to acquire the lock in a
 *		"reasonable" amount of time --- typically ~ 1 minute.
 *		Should return number of "delays"; see s_lock.c
 *
 *	void S_UNLOCK(slock_t *lock)
 *		Unlock a previously acquired lock.
 *
 *	bool S_LOCK_FREE(slock_t *lock)
 *		Tests if the lock is free. Returns TRUE if free, FALSE if locked.
 *		This does *not* change the state of the lock.
 *
 *	void SPIN_DELAY(void)
 *		Delay operation to occur inside spinlock wait loop.
 *
 *	Note to implementors: there are default implementations for all these
 *	macros at the bottom of the file.  Check if your platform can use
 *	these or needs to override them.
 *
 *  Usually, S_LOCK() is implemented in terms of even lower-level macros
 *	TAS() and TAS_SPIN():
 *
 *	int TAS(slock_t *lock)
 *		Atomic test-and-set instruction.  Attempt to acquire the lock,
 *		but do *not* wait.	Returns 0 if successful, nonzero if unable
 *		to acquire the lock.
 *
 *	int TAS_SPIN(slock_t *lock)
 *		Like TAS(), but this version is used when waiting for a lock
 *		previously found to be contended.  By default, this is the
 *		same as TAS(), but on some architectures it's better to poll a
 *		contended lock using an unlocked instruction and retry the
 *		atomic test-and-set only when it appears free.
 *
 *	TAS() and TAS_SPIN() are NOT part of the API, and should never be called
 *	directly.
 *
 *	CAUTION: on some platforms TAS() and/or TAS_SPIN() may sometimes report
 *	failure to acquire a lock even when the lock is not locked.  For example,
 *	on Alpha TAS() will "fail" if interrupted.  Therefore a retry loop must
 *	always be used, even if you are certain the lock is free.
 *
 *	Another caution for users of these macros is that it is the caller's
 *	responsibility to ensure that the compiler doesn't re-order accesses
 *	to shared memory to precede the actual lock acquisition, or follow the
 *	lock release.  Typically we handle this by using volatile-qualified
 *	pointers to refer to both the spinlock itself and the shared data
 *	structure being accessed within the spinlocked critical section.
 *	That fixes it because compilers are not allowed to re-order accesses
 *	to volatile objects relative to other such accesses.
 *
 *	On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
 *	S_UNLOCK() macros must further include hardware-level memory fence
 *	instructions to prevent similar re-ordering at the hardware level.
 *	TAS() and TAS_SPIN() must guarantee that loads and stores issued after
 *	the macro are not executed until the lock has been obtained.  Conversely,
 *	S_UNLOCK() must guarantee that loads and stores issued before the macro
 *	have been executed before the lock is released.
 *
 *	On most supported platforms, TAS() uses a tas() function written
 *	in assembly language to execute a hardware atomic-test-and-set
 *	instruction.  Equivalent OS-supplied mutex routines could be used too.
 *
 *	If no system-specific TAS() is available (ie, HAVE_SPINLOCKS is not
 *	defined), then we fall back on an emulation that uses SysV semaphores
 *	(see spin.c).  This emulation will be MUCH MUCH slower than a proper TAS()
 *	implementation, because of the cost of a kernel call per lock or unlock.
 *	An old report is that Postgres spends around 40% of its time in semop(2)
 *	when using the SysV semaphore code.
 *
 *
 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *	  src/include/storage/s_lock.h
 *
 *-------------------------------------------------------------------------
 */
#ifndef S_LOCK_H
#define S_LOCK_H

#ifdef HAVE_SPINLOCKS	/* skip spinlocks if requested */

#if defined(__GNUC__) || defined(__INTEL_COMPILER)
/*************************************************************************
 * All the gcc inlines
 * Gcc consistently defines the CPU as __cpu__.
 * Other compilers use __cpu or __cpu__ so we test for both in those cases.
 */

/*----------
 * Standard gcc asm format (assuming "volatile slock_t *lock"):

	__asm__ __volatile__(
		"	instruction	\n"
		"	instruction	\n"
		"	instruction	\n"
:		"=r"(_res), "+m"(*lock)		// return register, in/out lock value
:		"r"(lock)					// lock pointer, in input register
:		"memory", "cc");			// show clobbered registers here

 * The output-operands list (after first colon) should always include
 * "+m"(*lock), whether or not the asm code actually refers to this
 * operand directly.  This ensures that gcc believes the value in the
 * lock variable is used and set by the asm code.  Also, the clobbers
 * list (after third colon) should always include "memory"; this prevents
 * gcc from thinking it can cache the values of shared-memory fields
 * across the asm code.  Add "cc" if your asm code changes the condition
 * code register, and also list any temp registers the code uses.
 *----------
 */


#ifdef __i386__		/* 32-bit i386 */
#define HAS_TEST_AND_SET

typedef unsigned char slock_t;

#define TAS(lock) tas(lock)

static __inline__ int
tas(volatile slock_t *lock)
{
	register slock_t _res = 1;

	/*
	 * Use a non-locking test before asserting the bus lock.  Note that the
	 * extra test appears to be a small loss on some x86 platforms and a small
	 * win on others; it's by no means clear that we should keep it.
	 *
	 * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
	 * macros.  Nowadays it probably would be better to do a non-locking test
	 * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
	 * testing to verify that.  Without some empirical evidence, better to
	 * leave it alone.
	 */
	__asm__ __volatile__(
		"	cmpb	$0,%1	\n"
		"	jne		1f		\n"
		"	lock			\n"
		"	xchgb	%0,%1	\n"
		"1: \n"
:		"+q"(_res), "+m"(*lock)
:
:		"memory", "cc");
	return (int) _res;
}

#define SPIN_DELAY() spin_delay()

static __inline__ void
spin_delay(void)
{
	/*
	 * This sequence is equivalent to the PAUSE instruction ("rep" is
	 * ignored by old IA32 processors if the following instruction is
	 * not a string operation); the IA-32 Architecture Software
	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
	 * PAUSE in the inner loop of a spin lock is necessary for good
	 * performance:
	 *
	 *     The PAUSE instruction improves the performance of IA-32
	 *     processors supporting Hyper-Threading Technology when
	 *     executing spin-wait loops and other routines where one
	 *     thread is accessing a shared lock or semaphore in a tight
	 *     polling loop. When executing a spin-wait loop, the
	 *     processor can suffer a severe performance penalty when
	 *     exiting the loop because it detects a possible memory order
	 *     violation and flushes the core processor's pipeline. The
	 *     PAUSE instruction provides a hint to the processor that the
	 *     code sequence is a spin-wait loop. The processor uses this
	 *     hint to avoid the memory order violation and prevent the
	 *     pipeline flush. In addition, the PAUSE instruction
	 *     de-pipelines the spin-wait loop to prevent it from
	 *     consuming execution resources excessively.
	 */
	__asm__ __volatile__(
		" rep; nop			\n");
}

#endif	 /* __i386__ */


#ifdef __x86_64__		/* AMD Opteron, Intel EM64T */
#define HAS_TEST_AND_SET

typedef unsigned char slock_t;

#define TAS(lock) tas(lock)

/*
 * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
 * but only when spinning.
 *
 * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
 * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
 * available at:
 * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
 */
#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))

static __inline__ int
tas(volatile slock_t *lock)
{
	register slock_t _res = 1;

	__asm__ __volatile__(
		"	lock			\n"
		"	xchgb	%0,%1	\n"
:		"+q"(_res), "+m"(*lock)
:
:		"memory", "cc");
	return (int) _res;
}

#define SPIN_DELAY() spin_delay()

static __inline__ void
spin_delay(void)
{
	/*
	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
	 * Opteron, but it may be of some use on EM64T, so we keep it.
	 */
	__asm__ __volatile__(
		" rep; nop			\n");
}

#endif	 /* __x86_64__ */


#if defined(__ia64__) || defined(__ia64)
/*
 * Intel Itanium, gcc or Intel's compiler.
 *
 * Itanium has weak memory ordering, but we rely on the compiler to enforce
 * strict ordering of accesses to volatile data.  In particular, while the
 * xchg instruction implicitly acts as a memory barrier with 'acquire'
 * semantics, we do not have an explicit memory fence instruction in the
 * S_UNLOCK macro.  We use a regular assignment to clear the spinlock, and
 * trust that the compiler marks the generated store instruction with the
 * ".rel" opcode.
 *
 * Testing shows that assumption to hold on gcc, although I could not find
 * any explicit statement on that in the gcc manual.  In Intel's compiler,
 * the -m[no-]serialize-volatile option controls that, and testing shows that
 * it is enabled by default.
 */
#define HAS_TEST_AND_SET

typedef unsigned int slock_t;

#define TAS(lock) tas(lock)

/* On IA64, it's a win to use a non-locking test before the xchg proper */
#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))

#ifndef __INTEL_COMPILER

static __inline__ int
tas(volatile slock_t *lock)
{
	long int	ret;

	__asm__ __volatile__(
		"	xchg4 	%0=%1,%2	\n"
:		"=r"(ret), "+m"(*lock)
:		"r"(1)
:		"memory");
	return (int) ret;
}

#else /* __INTEL_COMPILER */

static __inline__ int
tas(volatile slock_t *lock)
{
	int		ret;

	ret = _InterlockedExchange(lock,1);	/* this is a xchg asm macro */

	return ret;
}

#endif /* __INTEL_COMPILER */
#endif	 /* __ia64__ || __ia64 */


/*
 * On ARM, we use __sync_lock_test_and_set(int *, int) if available, and if
 * not fall back on the SWPB instruction.  SWPB does not work on ARMv6 or
 * later, so the compiler builtin is preferred if available.  Note also that
 * the int-width variant of the builtin works on more chips than other widths.
 */
#if defined(__arm__) || defined(__arm)
#define HAS_TEST_AND_SET

#define TAS(lock) tas(lock)

#ifdef HAVE_GCC_INT_ATOMICS

typedef int slock_t;

static __inline__ int
tas(volatile slock_t *lock)
{
	return __sync_lock_test_and_set(lock, 1);
}

#define S_UNLOCK(lock) __sync_lock_release(lock)

#else /* !HAVE_GCC_INT_ATOMICS */

typedef unsigned char slock_t;

static __inline__ int
tas(volatile slock_t *lock)
{
	register slock_t _res = 1;

	__asm__ __volatile__(
		"	swpb 	%0, %0, [%2]	\n"
:		"+r"(_res), "+m"(*lock)
:		"r"(lock)
:		"memory");
	return (int) _res;
}

#endif	 /* HAVE_GCC_INT_ATOMICS */
#endif	 /* __arm__ */


/*
 * On ARM64, we use __sync_lock_test_and_set(int *, int) if available.
 */
#if defined(__aarch64__) || defined(__aarch64)
#ifdef HAVE_GCC_INT_ATOMICS
#define HAS_TEST_AND_SET

#define TAS(lock) tas(lock)

typedef int slock_t;

static __inline__ int
tas(volatile slock_t *lock)
{
	return __sync_lock_test_and_set(lock, 1);
}

#define S_UNLOCK(lock) __sync_lock_release(lock)

#endif	 /* HAVE_GCC_INT_ATOMICS */
#endif	 /* __aarch64__ */


/* S/390 and S/390x Linux (32- and 64-bit zSeries) */
#if defined(__s390__) || defined(__s390x__)
#define HAS_TEST_AND_SET

typedef unsigned int slock_t;

#define TAS(lock)	   tas(lock)

static __inline__ int
tas(volatile slock_t *lock)
{
	int			_res = 0;

	__asm__	__volatile__(
		"	cs 	%0,%3,0(%2)		\n"
:		"+d"(_res), "+m"(*lock)
:		"a"(lock), "d"(1)
:		"memory", "cc");
	return _res;
}

#endif	 /* __s390__ || __s390x__ */


#if defined(__sparc__)		/* Sparc */
#define HAS_TEST_AND_SET

typedef unsigned char slock_t;

#define TAS(lock) tas(lock)

static __inline__ int
tas(volatile slock_t *lock)
{
	register slock_t _res;

	/*
	 *	See comment in /pg/backend/port/tas/solaris_sparc.s for why this
	 *	uses "ldstub", and that file uses "cas".  gcc currently generates
	 *	sparcv7-targeted binaries, so "cas" use isn't possible.
	 */
	__asm__ __volatile__(
		"	ldstub	[%2], %0	\n"
:		"=r"(_res), "+m"(*lock)
:		"r"(lock)
:		"memory");
	return (int) _res;
}

#endif	 /* __sparc__ */


/* PowerPC */
#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
#define HAS_TEST_AND_SET

typedef unsigned int slock_t;

#define TAS(lock) tas(lock)

/* On PPC, it's a win to use a non-locking test before the lwarx */
#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))

/*
 * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
 * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
 * On newer machines, we can use lwsync instead for better performance.
 */
static __inline__ int
tas(volatile slock_t *lock)
{
	slock_t _t;
	int _res;

	__asm__ __volatile__(
#ifdef USE_PPC_LWARX_MUTEX_HINT
"	lwarx   %0,0,%3,1	\n"
#else
"	lwarx   %0,0,%3		\n"
#endif
"	cmpwi   %0,0		\n"
"	bne     1f			\n"
"	addi    %0,%0,1		\n"
"	stwcx.  %0,0,%3		\n"
"	beq     2f         	\n"
"1:	li      %1,1		\n"
"	b		3f			\n"
"2:						\n"
#ifdef USE_PPC_LWSYNC
"	lwsync				\n"
#else
"	isync				\n"
#endif
"	li      %1,0		\n"
"3:						\n"

:	"=&r"(_t), "=r"(_res), "+m"(*lock)
:	"r"(lock)
:	"memory", "cc");
	return _res;
}

/*
 * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
 * On newer machines, we can use lwsync instead for better performance.
 */
#ifdef USE_PPC_LWSYNC
#define S_UNLOCK(lock)	\
do \
{ \
	__asm__ __volatile__ ("	lwsync \n"); \
	*((volatile slock_t *) (lock)) = 0; \
} while (0)
#else
#define S_UNLOCK(lock)	\
do \
{ \
	__asm__ __volatile__ ("	sync \n"); \
	*((volatile slock_t *) (lock)) = 0; \
} while (0)
#endif /* USE_PPC_LWSYNC */

#endif /* powerpc */


/* Linux Motorola 68k */
#if (defined(__mc68000__) || defined(__m68k__)) && defined(__linux__)
#define HAS_TEST_AND_SET

typedef unsigned char slock_t;

#define TAS(lock) tas(lock)

static __inline__ int
tas(volatile slock_t *lock)
{
	register int rv;

	__asm__	__volatile__(
		"	clrl	%0		\n"
		"	tas		%1		\n"
		"	sne		%0		\n"
:		"=d"(rv), "+m"(*lock)
:
:		"memory", "cc");
	return rv;
}

#endif	 /* (__mc68000__ || __m68k__) && __linux__ */


/*
 * VAXen -- even multiprocessor ones
 * (thanks to Tom Ivar Helbekkmo)
 */
#if defined(__vax__)
#define HAS_TEST_AND_SET

typedef unsigned char slock_t;

#define TAS(lock) tas(lock)

static __inline__ int
tas(volatile slock_t *lock)
{
	register int	_res;

	__asm__ __volatile__(
		"	movl 	$1, %0			\n"
		"	bbssi	$0, (%2), 1f	\n"
		"	clrl	%0				\n"
		"1: \n"
:		"=&r"(_res), "+m"(*lock)
:		"r"(lock)
:		"memory");
	return _res;
}

#endif	 /* __vax__ */


#if defined(__mips__) && !defined(__sgi)	/* non-SGI MIPS */
/* Note: on SGI we use the OS' mutex ABI, see below */
/* Note: R10000 processors require a separate SYNC */
#define HAS_TEST_AND_SET

typedef unsigned int slock_t;

#define TAS(lock) tas(lock)

static __inline__ int
tas(volatile slock_t *lock)
{
	register volatile slock_t *_l = lock;
	register int _res;
	register int _tmp;

	__asm__ __volatile__(
		"       .set push           \n"
		"       .set mips2          \n"
		"       .set noreorder      \n"
		"       .set nomacro        \n"
		"       ll      %0, %2      \n"
		"       or      %1, %0, 1   \n"
		"       sc      %1, %2      \n"
		"       xori    %1, 1       \n"
		"       or      %0, %0, %1  \n"
		"       sync                \n"
		"       .set pop              "
:		"=&r" (_res), "=&r" (_tmp), "+R" (*_l)
:
:		"memory");
	return _res;
}

/* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
#define S_UNLOCK(lock)	\
do \
{ \
	__asm__ __volatile__( \
		"       .set push           \n" \
		"       .set mips2          \n" \
		"       .set noreorder      \n" \
		"       .set nomacro        \n" \
		"       sync                \n" \
		"       .set pop              "); \
	*((volatile slock_t *) (lock)) = 0; \
} while (0)

#endif /* __mips__ && !__sgi */


#if defined(__m32r__) && defined(HAVE_SYS_TAS_H)	/* Renesas' M32R */
#define HAS_TEST_AND_SET

#include <sys/tas.h>

typedef int slock_t;

#define TAS(lock) tas(lock)

#endif /* __m32r__ */


#if defined(__sh__)				/* Renesas' SuperH */
#define HAS_TEST_AND_SET

typedef unsigned char slock_t;

#define TAS(lock) tas(lock)

static __inline__ int
tas(volatile slock_t *lock)
{
	register int _res;

	/*
	 * This asm is coded as if %0 could be any register, but actually SuperH
	 * restricts the target of xor-immediate to be R0.  That's handled by
	 * the "z" constraint on _res.
	 */
	__asm__ __volatile__(
		"	tas.b @%2    \n"
		"	movt  %0     \n"
		"	xor   #1,%0  \n"
:		"=z"(_res), "+m"(*lock)
:		"r"(lock)
:		"memory", "t");
	return _res;
}

#endif	 /* __sh__ */


/* These live in s_lock.c, but only for gcc */


#if defined(__m68k__) && !defined(__linux__)	/* non-Linux Motorola 68k */
#define HAS_TEST_AND_SET

typedef unsigned char slock_t;
#endif


#endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */



/*
 * ---------------------------------------------------------------------
 * Platforms that use non-gcc inline assembly:
 * ---------------------------------------------------------------------
 */

#if !defined(HAS_TEST_AND_SET)	/* We didn't trigger above, let's try here */


#if defined(USE_UNIVEL_CC)		/* Unixware compiler */
#define HAS_TEST_AND_SET

typedef unsigned char slock_t;

#define TAS(lock)	tas(lock)

asm int
tas(volatile slock_t *s_lock)
{
/* UNIVEL wants %mem in column 1, so we don't pg_indent this file */
%mem s_lock
	pushl %ebx
	movl s_lock, %ebx
	movl $255, %eax
	lock
	xchgb %al, (%ebx)
	popl %ebx
}

#endif	 /* defined(USE_UNIVEL_CC) */


#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC, GCC and HP compilers */
/*
 * HP's PA-RISC
 *
 * See src/backend/port/hpux/tas.c.template for details about LDCWX.  Because
 * LDCWX requires a 16-byte-aligned address, we declare slock_t as a 16-byte
 * struct.  The active word in the struct is whichever has the aligned address;
 * the other three words just sit at -1.
 *
 * When using gcc, we can inline the required assembly code.
 */
#define HAS_TEST_AND_SET

typedef struct
{
	int			sema[4];
} slock_t;

#define TAS_ACTIVE_WORD(lock)	((volatile int *) (((uintptr_t) (lock) + 15) & ~15))

#if defined(__GNUC__)

static __inline__ int
tas(volatile slock_t *lock)
{
	volatile int *lockword = TAS_ACTIVE_WORD(lock);
	register int lockval;

	__asm__ __volatile__(
		"	ldcwx	0(0,%2),%0	\n"
:		"=r"(lockval), "+m"(*lockword)
:		"r"(lockword)
:		"memory");
	return (lockval == 0);
}

#endif /* __GNUC__ */

#define S_UNLOCK(lock)	(*TAS_ACTIVE_WORD(lock) = -1)

#define S_INIT_LOCK(lock) \
	do { \
		volatile slock_t *lock_ = (lock); \
		lock_->sema[0] = -1; \
		lock_->sema[1] = -1; \
		lock_->sema[2] = -1; \
		lock_->sema[3] = -1; \
	} while (0)

#define S_LOCK_FREE(lock)	(*TAS_ACTIVE_WORD(lock) != 0)

#endif	 /* __hppa || __hppa__ */


#if defined(__hpux) && defined(__ia64) && !defined(__GNUC__)
/*
 * HP-UX on Itanium, non-gcc compiler
 *
 * We assume that the compiler enforces strict ordering of loads/stores on
 * volatile data (see comments on the gcc-version earlier in this file).
 * Note that this assumption does *not* hold if you use the
 * +Ovolatile=__unordered option on the HP-UX compiler, so don't do that.
 *
 * See also Implementing Spinlocks on the Intel Itanium Architecture and
 * PA-RISC, by Tor Ekqvist and David Graves, for more information.  As of
 * this writing, version 1.0 of the manual is available at:
 * http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf
 */
#define HAS_TEST_AND_SET

typedef unsigned int slock_t;

#include <ia64/sys/inline.h>
#define TAS(lock) _Asm_xchg(_SZ_W, lock, 1, _LDHINT_NONE)
/* On IA64, it's a win to use a non-locking test before the xchg proper */
#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))

#endif	/* HPUX on IA64, non gcc */

#if defined(_AIX)	/* AIX */
/*
 * AIX (POWER)
 */
#define HAS_TEST_AND_SET

#include <sys/atomic_op.h>

typedef int slock_t;

#define TAS(lock)			_check_lock((slock_t *) (lock), 0, 1)
#define S_UNLOCK(lock)		_clear_lock((slock_t *) (lock), 0)
#endif	 /* _AIX */


/* These are in s_lock.c */

#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
#define HAS_TEST_AND_SET

#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
typedef unsigned int slock_t;
#else
typedef unsigned char slock_t;
#endif

extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
									  slock_t cmp);

#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
#endif


#ifdef WIN32_ONLY_COMPILER
typedef LONG slock_t;

#define HAS_TEST_AND_SET
#define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))

#define SPIN_DELAY() spin_delay()

/* If using Visual C++ on Win64, inline assembly is unavailable.
 * Use a _mm_pause instrinsic instead of rep nop.
 */
#if defined(_WIN64)
static __forceinline void
spin_delay(void)
{
	_mm_pause();
}
#else
static __forceinline void
spin_delay(void)
{
	/* See comment for gcc code. Same code, MASM syntax */
	__asm rep nop;
}
#endif

#endif


#endif	/* !defined(HAS_TEST_AND_SET) */


/* Blow up if we didn't have any way to do spinlocks */
#ifndef HAS_TEST_AND_SET
#error PostgreSQL does not have native spinlock support on this platform.  To continue the compilation, rerun configure using --disable-spinlocks.  However, performance will be poor.  Please report this to pgsql-bugs@postgresql.org.
#endif


#else	/* !HAVE_SPINLOCKS */


/*
 * Fake spinlock implementation using semaphores --- slow and prone
 * to fall foul of kernel limits on number of semaphores, so don't use this
 * unless you must!  The subroutines appear in spin.c.
 */
typedef int slock_t;

extern bool s_lock_free_sema(volatile slock_t *lock);
extern void s_unlock_sema(volatile slock_t *lock);
extern void s_init_lock_sema(volatile slock_t *lock);
extern int	tas_sema(volatile slock_t *lock);

#define S_LOCK_FREE(lock)	s_lock_free_sema(lock)
#define S_UNLOCK(lock)	 s_unlock_sema(lock)
#define S_INIT_LOCK(lock)	s_init_lock_sema(lock)
#define TAS(lock)	tas_sema(lock)


#endif	/* HAVE_SPINLOCKS */


/*
 * Default Definitions - override these above as needed.
 */

#if !defined(S_LOCK)
#define S_LOCK(lock) \
	(TAS(lock) ? s_lock((lock), __FILE__, __LINE__) : 0)
#endif	 /* S_LOCK */

#if !defined(S_LOCK_FREE)
#define S_LOCK_FREE(lock)	(*(lock) == 0)
#endif	 /* S_LOCK_FREE */

#if !defined(S_UNLOCK)
#define S_UNLOCK(lock)		(*((volatile slock_t *) (lock)) = 0)
#endif	 /* S_UNLOCK */

#if !defined(S_INIT_LOCK)
#define S_INIT_LOCK(lock)	S_UNLOCK(lock)
#endif	 /* S_INIT_LOCK */

#if !defined(SPIN_DELAY)
#define SPIN_DELAY()	((void) 0)
#endif	 /* SPIN_DELAY */

#if !defined(TAS)
extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
												 * s_lock.c */

#define TAS(lock)		tas(lock)
#endif	 /* TAS */

#if !defined(TAS_SPIN)
#define TAS_SPIN(lock)	TAS(lock)
#endif	 /* TAS_SPIN */


/*
 * Platform-independent out-of-line support routines
 */
extern int s_lock(volatile slock_t *lock, const char *file, int line);

/* Support for dynamic adjustment of spins_per_delay */
#define DEFAULT_SPINS_PER_DELAY  100

extern void set_spins_per_delay(int shared_spins_per_delay);
extern int	update_spins_per_delay(int shared_spins_per_delay);

#endif	 /* S_LOCK_H */