diff options
Diffstat (limited to 'include/atomic')
-rw-r--r-- | include/atomic/gcc_builtins.h | 5 | ||||
-rw-r--r-- | include/atomic/generic-msvc.h | 116 | ||||
-rw-r--r-- | include/atomic/nolock.h | 49 | ||||
-rw-r--r-- | include/atomic/rwlock.h | 27 | ||||
-rw-r--r-- | include/atomic/solaris.h | 8 | ||||
-rw-r--r-- | include/atomic/x86-gcc.h | 56 | ||||
-rw-r--r-- | include/atomic/x86-msvc.h | 96 |
7 files changed, 207 insertions, 150 deletions
diff --git a/include/atomic/gcc_builtins.h b/include/atomic/gcc_builtins.h index 01ebc38707e..100ff80cacd 100644 --- a/include/atomic/gcc_builtins.h +++ b/include/atomic/gcc_builtins.h @@ -18,7 +18,7 @@ #define make_atomic_add_body(S) \ v= __sync_fetch_and_add(a, v); -#define make_atomic_swap_body(S) \ +#define make_atomic_fas_body(S) \ v= __sync_lock_test_and_set(a, v); #define make_atomic_cas_body(S) \ int ## S sav; \ @@ -28,7 +28,10 @@ #ifdef MY_ATOMIC_MODE_DUMMY #define make_atomic_load_body(S) ret= *a #define make_atomic_store_body(S) *a= v +#define MY_ATOMIC_MODE "gcc-builtins-up" + #else +#define MY_ATOMIC_MODE "gcc-builtins-smp" #define make_atomic_load_body(S) \ ret= __sync_fetch_and_or(a, 0); #define make_atomic_store_body(S) \ diff --git a/include/atomic/generic-msvc.h b/include/atomic/generic-msvc.h new file mode 100644 index 00000000000..f1e1b0e88c9 --- /dev/null +++ b/include/atomic/generic-msvc.h @@ -0,0 +1,116 @@ +/* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _atomic_h_cleanup_ +#define _atomic_h_cleanup_ "atomic/generic-msvc.h" + +/* + We don't implement anything specific for MY_ATOMIC_MODE_DUMMY, always use + intrinsics. + 8 and 16-bit atomics are not implemented, but it can be done if necessary. +*/ +#undef MY_ATOMIC_HAS_8_16 + +/* + x86 compilers (both VS2003 or VS2005) never use instrinsics, but generate + function calls to kernel32 instead, even in the optimized build. + We force intrinsics as described in MSDN documentation for + _InterlockedCompareExchange. +*/ +#ifdef _M_IX86 + +#if (_MSC_VER >= 1500) +#include <intrin.h> +#else +C_MODE_START +/*Visual Studio 2003 and earlier do not have prototypes for atomic intrinsics*/ +LONG _InterlockedExchange (LONG volatile *Target,LONG Value); +LONG _InterlockedCompareExchange (LONG volatile *Target, LONG Value, LONG Comp); +LONG _InterlockedExchangeAdd (LONG volatile *Addend, LONG Value); +C_MODE_END + +#pragma intrinsic(_InterlockedExchangeAdd) +#pragma intrinsic(_InterlockedCompareExchange) +#pragma intrinsic(_InterlockedExchange) +#endif + +#define InterlockedExchange _InterlockedExchange +#define InterlockedExchangeAdd _InterlockedExchangeAdd +#define InterlockedCompareExchange _InterlockedCompareExchange +/* + No need to do something special for InterlockedCompareExchangePointer + as it is a #define to InterlockedCompareExchange. The same applies to + InterlockedExchangePointer. +*/ +#endif /*_M_IX86*/ + +#define MY_ATOMIC_MODE "msvc-intrinsics" +#define IL_EXCHG_ADD32(X,Y) InterlockedExchangeAdd((volatile LONG *)(X),(Y)) +#define IL_COMP_EXCHG32(X,Y,Z) InterlockedCompareExchange((volatile LONG *)(X),(Y),(Z)) +#define IL_COMP_EXCHGptr InterlockedCompareExchangePointer +#define IL_EXCHG32(X,Y) InterlockedExchange((volatile LONG *)(X),(Y)) +#define IL_EXCHGptr InterlockedExchangePointer +#define make_atomic_add_body(S) \ + v= IL_EXCHG_ADD ## S (a, v) +#define make_atomic_cas_body(S) \ + int ## S initial_cmp= *cmp; \ + int ## S initial_a= IL_COMP_EXCHG ## S (a, set, initial_cmp); \ + if (!(ret= (initial_a == initial_cmp))) *cmp= initial_a; +#define make_atomic_swap_body(S) \ + v= IL_EXCHG ## S (a, v) +#define make_atomic_load_body(S) \ + ret= 0; /* avoid compiler warning */ \ + ret= IL_COMP_EXCHG ## S (a, ret, ret); + +/* + my_yield_processor (equivalent of x86 PAUSE instruction) should be used + to improve performance on hyperthreaded CPUs. Intel recommends to use it in + spin loops also on non-HT machines to reduce power consumption (see e.g + http://softwarecommunity.intel.com/articles/eng/2004.htm) + + Running benchmarks for spinlocks implemented with InterlockedCompareExchange + and YieldProcessor shows that much better performance is achieved by calling + YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting + loop count in the range 200-300 brought best results. + */ +#ifndef YIELD_LOOPS +#define YIELD_LOOPS 200 +#endif + +static __inline int my_yield_processor() +{ + int i; + for(i=0; i<YIELD_LOOPS; i++) + { +#if (_MSC_VER <= 1310) + /* On older compilers YieldProcessor is not available, use inline assembly*/ + __asm { rep nop } +#else + YieldProcessor(); +#endif + } + return 1; +} + +#define LF_BACKOFF my_yield_processor() +#else /* cleanup */ + +#undef IL_EXCHG_ADD32 +#undef IL_COMP_EXCHG32 +#undef IL_COMP_EXCHGptr +#undef IL_EXCHG32 +#undef IL_EXCHGptr + +#endif diff --git a/include/atomic/nolock.h b/include/atomic/nolock.h index d7d87167ade..e4cd9ab9896 100644 --- a/include/atomic/nolock.h +++ b/include/atomic/nolock.h @@ -16,43 +16,36 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#if defined(__i386__) || defined(_M_IX86) || defined(HAVE_GCC_ATOMIC_BUILTINS) - -#ifdef MY_ATOMIC_MODE_DUMMY -# define LOCK "" -#else -# define LOCK "lock" -#endif - -#ifdef HAVE_GCC_ATOMIC_BUILTINS -#include "gcc_builtins.h" -#elif __GNUC__ -#include "x86-gcc.h" -#elif defined(_MSC_VER) -#include "x86-msvc.h" -#endif - +#if defined(__i386__) || defined(_MSC_VER) || defined(__x86_64__) \ + || defined(HAVE_GCC_ATOMIC_BUILTINS) + +# ifdef MY_ATOMIC_MODE_DUMMY +# define LOCK_prefix "" +# else +# define LOCK_prefix "lock" +# endif + +# ifdef HAVE_GCC_ATOMIC_BUILTINS +# include "gcc_builtins.h" +# elif __GNUC__ +# include "x86-gcc.h" +# elif defined(_MSC_VER) +# include "generic-msvc.h" +# endif #elif defined(HAVE_SOLARIS_ATOMIC) - #include "solaris.h" - -#endif /* __i386__ || _M_IX86 || HAVE_GCC_ATOMIC_BUILTINS */ +#endif #if defined(make_atomic_cas_body) || defined(MY_ATOMICS_MADE) /* * We have atomics that require no locking */ #define MY_ATOMIC_NOLOCK - -#ifdef __SUNPRO_C /* - * Sun Studio 12 (and likely earlier) does not accept a typedef struct {} - */ -typedef char my_atomic_rwlock_t; -#else -typedef struct { } my_atomic_rwlock_t; -#endif - + Type not used so minimal size (emptry struct has different size between C + and C++, zero-length array is gcc-specific). +*/ +typedef char my_atomic_rwlock_t __attribute__ ((unused)); #define my_atomic_rwlock_destroy(name) #define my_atomic_rwlock_init(name) #define my_atomic_rwlock_rdlock(name) diff --git a/include/atomic/rwlock.h b/include/atomic/rwlock.h index 0ff4d16c545..29e22fcb3d5 100644 --- a/include/atomic/rwlock.h +++ b/include/atomic/rwlock.h @@ -16,7 +16,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; +typedef struct {pthread_mutex_t rw;} my_atomic_rwlock_t; +#define MY_ATOMIC_MODE_RWLOCKS 1 #ifdef MY_ATOMIC_MODE_DUMMY /* @@ -34,17 +35,25 @@ typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; #define my_atomic_rwlock_wrunlock(name) #define MY_ATOMIC_MODE "dummy (non-atomic)" #else -#define my_atomic_rwlock_destroy(name) pthread_rwlock_destroy(& (name)->rw) -#define my_atomic_rwlock_init(name) pthread_rwlock_init(& (name)->rw, 0) -#define my_atomic_rwlock_rdlock(name) pthread_rwlock_rdlock(& (name)->rw) -#define my_atomic_rwlock_wrlock(name) pthread_rwlock_wrlock(& (name)->rw) -#define my_atomic_rwlock_rdunlock(name) pthread_rwlock_unlock(& (name)->rw) -#define my_atomic_rwlock_wrunlock(name) pthread_rwlock_unlock(& (name)->rw) -#define MY_ATOMIC_MODE "rwlocks" +/* + we're using read-write lock macros but map them to mutex locks, and they're + faster. Still, having semantically rich API we can change the + underlying implementation, if necessary. +*/ +#define my_atomic_rwlock_destroy(name) pthread_mutex_destroy(& (name)->rw) +#define my_atomic_rwlock_init(name) pthread_mutex_init(& (name)->rw, 0) +#define my_atomic_rwlock_rdlock(name) pthread_mutex_lock(& (name)->rw) +#define my_atomic_rwlock_wrlock(name) pthread_mutex_lock(& (name)->rw) +#define my_atomic_rwlock_rdunlock(name) pthread_mutex_unlock(& (name)->rw) +#define my_atomic_rwlock_wrunlock(name) pthread_mutex_unlock(& (name)->rw) +#define MY_ATOMIC_MODE "mutex" +#ifndef MY_ATOMIC_MODE_RWLOCKS +#define MY_ATOMIC_MODE_RWLOCKS 1 +#endif #endif #define make_atomic_add_body(S) int ## S sav; sav= *a; *a+= v; v=sav; -#define make_atomic_swap_body(S) int ## S sav; sav= *a; *a= v; v=sav; +#define make_atomic_fas_body(S) int ## S sav; sav= *a; *a= v; v=sav; #define make_atomic_cas_body(S) if ((ret= (*a == *cmp))) *a= set; else *cmp=*a; #define make_atomic_load_body(S) ret= *a; #define make_atomic_store_body(S) *a= v; diff --git a/include/atomic/solaris.h b/include/atomic/solaris.h index 4c51253d2d5..45efd9faaba 100644 --- a/include/atomic/solaris.h +++ b/include/atomic/solaris.h @@ -186,25 +186,25 @@ my_atomic_storeptr(void * volatile *a, void *v) /* ------------------------------------------------------------------------ */ STATIC_INLINE int8 -my_atomic_swap8(int8 volatile *a, int8 v) +my_atomic_fas8(int8 volatile *a, int8 v) { return ((int8) atomic_swap_8((volatile uint8_t *)a, (uint8_t)v)); } STATIC_INLINE int16 -my_atomic_swap16(int16 volatile *a, int16 v) +my_atomic_fas16(int16 volatile *a, int16 v) { return ((int16) atomic_swap_16((volatile uint16_t *)a, (uint16_t)v)); } STATIC_INLINE int32 -my_atomic_swap32(int32 volatile *a, int32 v) +my_atomic_fas32(int32 volatile *a, int32 v) { return ((int32) atomic_swap_32((volatile uint32_t *)a, (uint32_t)v)); } STATIC_INLINE void * -my_atomic_swapptr(void * volatile *a, void *v) +my_atomic_fasptr(void * volatile *a, void *v) { return (atomic_swap_ptr(a, v)); } diff --git a/include/atomic/x86-gcc.h b/include/atomic/x86-gcc.h index c3029f9c1b4..cbdb2d11e68 100644 --- a/include/atomic/x86-gcc.h +++ b/include/atomic/x86-gcc.h @@ -22,10 +22,18 @@ architectures support double-word (128-bit) cas. */ -#ifdef MY_ATOMIC_NO_XADD -#define MY_ATOMIC_MODE "gcc-x86" LOCK "-no-xadd" +#ifdef __x86_64__ +# ifdef MY_ATOMIC_NO_XADD +# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix "-no-xadd" +# else +# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix +# endif #else -#define MY_ATOMIC_MODE "gcc-x86" LOCK +# ifdef MY_ATOMIC_NO_XADD +# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix "-no-xadd" +# else +# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix +# endif #endif /* fix -ansi errors while maintaining readability */ @@ -34,29 +42,53 @@ #endif #ifndef MY_ATOMIC_NO_XADD -#define make_atomic_add_body(S) \ - asm volatile (LOCK "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) +#define make_atomic_add_body(S) make_atomic_add_body ## S +#define make_atomic_cas_body(S) make_atomic_cas_body ## S #endif -#define make_atomic_swap_body(S) \ - asm volatile ("; xchg %0, %1;" : "+q" (v) , "+m" (*a)) -#define make_atomic_cas_body(S) \ - asm volatile (LOCK "; cmpxchg %3, %0; setz %2;" \ + +#define make_atomic_add_body32 \ + asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) + +#define make_atomic_cas_body32 \ + asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \ : "+m" (*a), "+a" (*cmp), "=q" (ret): "r" (set)) +#define make_atomic_cas_bodyptr make_atomic_cas_body32 + +#ifndef __x86_64__ +#define make_atomic_add_body64 make_atomic_add_body32 +#define make_atomic_cas_body64 make_atomic_cas_body32 +#else +#define make_atomic_add_body64 \ + int64 tmp=*a; \ + while (!my_atomic_cas64(a, &tmp, tmp+v)); \ + v=tmp; +#define make_atomic_cas_body64 \ + int32 ebx=(set & 0xFFFFFFFF), ecx=(set >> 32); \ + asm volatile (LOCK_prefix "; cmpxchg8b %0; setz %2;" \ + : "+m" (*a), "+A" (*cmp), "=q" (ret) \ + :"b" (ebx), "c" (ecx)) +#endif + +#define make_atomic_fas_body(S) \ + asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a)) + #ifdef MY_ATOMIC_MODE_DUMMY #define make_atomic_load_body(S) ret=*a #define make_atomic_store_body(S) *a=v #else /* Actually 32-bit reads/writes are always atomic on x86 - But we add LOCK here anyway to force memory barriers + But we add LOCK_prefix here anyway to force memory barriers */ #define make_atomic_load_body(S) \ ret=0; \ - asm volatile (LOCK "; cmpxchg %2, %0" \ + asm volatile (LOCK_prefix "; cmpxchg %2, %0" \ : "+m" (*a), "+a" (ret): "r" (ret)) #define make_atomic_store_body(S) \ - asm volatile ("; xchg %0, %1;" : "+m" (*a) : "r" (v)) + asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v)) #endif +/* TODO test on intel whether the below helps. on AMD it makes no difference */ +//#define LF_BACKOFF ({asm volatile ("rep; nop"); 1; }) #endif /* ATOMIC_X86_GCC_INCLUDED */ diff --git a/include/atomic/x86-msvc.h b/include/atomic/x86-msvc.h deleted file mode 100644 index c4885bb8451..00000000000 --- a/include/atomic/x86-msvc.h +++ /dev/null @@ -1,96 +0,0 @@ -/* Copyright (C) 2006 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - XXX 64-bit atomic operations can be implemented using - cmpxchg8b, if necessary -*/ - -// Would it be better to use intrinsics ? -// (InterlockedCompareExchange, InterlockedCompareExchange16 -// InterlockedExchangeAdd, InterlockedExchange) - -#ifndef _atomic_h_cleanup_ -#define _atomic_h_cleanup_ "atomic/x86-msvc.h" - -#define MY_ATOMIC_MODE "msvc-x86" LOCK - -#define make_atomic_add_body(S) \ - _asm { \ - _asm mov reg_ ## S, v \ - _asm LOCK xadd *a, reg_ ## S \ - _asm movzx v, reg_ ## S \ - } -#define make_atomic_cas_body(S) \ - _asm { \ - _asm mov areg_ ## S, *cmp \ - _asm mov reg2_ ## S, set \ - _asm LOCK cmpxchg *a, reg2_ ## S \ - _asm mov *cmp, areg_ ## S \ - _asm setz al \ - _asm movzx ret, al \ - } -#define make_atomic_swap_body(S) \ - _asm { \ - _asm mov reg_ ## S, v \ - _asm xchg *a, reg_ ## S \ - _asm mov v, reg_ ## S \ - } - -#ifdef MY_ATOMIC_MODE_DUMMY -#define make_atomic_load_body(S) ret=*a -#define make_atomic_store_body(S) *a=v -#else -/* - Actually 32-bit reads/writes are always atomic on x86 - But we add LOCK here anyway to force memory barriers -*/ -#define make_atomic_load_body(S) \ - _asm { \ - _asm mov areg_ ## S, 0 \ - _asm mov reg2_ ## S, areg_ ## S \ - _asm LOCK cmpxchg *a, reg2_ ## S \ - _asm mov ret, areg_ ## S \ - } -#define make_atomic_store_body(S) \ - _asm { \ - _asm mov reg_ ## S, v \ - _asm xchg *a, reg_ ## S \ - } -#endif - -#define reg_8 al -#define reg_16 ax -#define reg_32 eax -#define areg_8 al -#define areg_16 ax -#define areg_32 eax -#define reg2_8 bl -#define reg2_16 bx -#define reg2_32 ebx - -#else /* cleanup */ - -#undef reg_8 -#undef reg_16 -#undef reg_32 -#undef areg_8 -#undef areg_16 -#undef areg_32 -#undef reg2_8 -#undef reg2_16 -#undef reg2_32 -#endif - |