diff options
Diffstat (limited to 'include/atomic')
-rw-r--r-- | include/atomic/gcc_builtins.h | 5 | ||||
-rw-r--r-- | include/atomic/generic-msvc.h | 134 | ||||
-rw-r--r-- | include/atomic/nolock.h | 69 | ||||
-rw-r--r-- | include/atomic/rwlock.h | 70 | ||||
-rw-r--r-- | include/atomic/solaris.h | 206 | ||||
-rw-r--r-- | include/atomic/x86-gcc.h | 104 | ||||
-rw-r--r-- | include/atomic/x86-msvc.h | 96 |
7 files changed, 348 insertions, 336 deletions
diff --git a/include/atomic/gcc_builtins.h b/include/atomic/gcc_builtins.h index 01ebc38707e..100ff80cacd 100644 --- a/include/atomic/gcc_builtins.h +++ b/include/atomic/gcc_builtins.h @@ -18,7 +18,7 @@ #define make_atomic_add_body(S) \ v= __sync_fetch_and_add(a, v); -#define make_atomic_swap_body(S) \ +#define make_atomic_fas_body(S) \ v= __sync_lock_test_and_set(a, v); #define make_atomic_cas_body(S) \ int ## S sav; \ @@ -28,7 +28,10 @@ #ifdef MY_ATOMIC_MODE_DUMMY #define make_atomic_load_body(S) ret= *a #define make_atomic_store_body(S) *a= v +#define MY_ATOMIC_MODE "gcc-builtins-up" + #else +#define MY_ATOMIC_MODE "gcc-builtins-smp" #define make_atomic_load_body(S) \ ret= __sync_fetch_and_or(a, 0); #define make_atomic_store_body(S) \ diff --git a/include/atomic/generic-msvc.h b/include/atomic/generic-msvc.h new file mode 100644 index 00000000000..a84cde6b2c3 --- /dev/null +++ b/include/atomic/generic-msvc.h @@ -0,0 +1,134 @@ +/* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _atomic_h_cleanup_ +#define _atomic_h_cleanup_ "atomic/generic-msvc.h" + +/* + We don't implement anything specific for MY_ATOMIC_MODE_DUMMY, always use + intrinsics. + 8 and 16-bit atomics are not implemented, but it can be done if necessary. +*/ +#undef MY_ATOMIC_HAS_8_16 + +#include <windows.h> +/* + x86 compilers (both VS2003 or VS2005) never use instrinsics, but generate + function calls to kernel32 instead, even in the optimized build. + We force intrinsics as described in MSDN documentation for + _InterlockedCompareExchange. +*/ +#ifdef _M_IX86 + +#if (_MSC_VER >= 1500) +#include <intrin.h> +#else +C_MODE_START +/*Visual Studio 2003 and earlier do not have prototypes for atomic intrinsics*/ +LONG _InterlockedCompareExchange (LONG volatile *Target, LONG Value, LONG Comp); +LONGLONG _InterlockedCompareExchange64 (LONGLONG volatile *Target, + LONGLONG Value, LONGLONG Comp); +C_MODE_END + +#pragma intrinsic(_InterlockedCompareExchange) +#pragma intrinsic(_InterlockedCompareExchange64) +#endif + +#define InterlockedCompareExchange _InterlockedCompareExchange +#define InterlockedCompareExchange64 _InterlockedCompareExchange64 +/* + No need to do something special for InterlockedCompareExchangePointer + as it is a #define to InterlockedCompareExchange. The same applies to + InterlockedExchangePointer. +*/ +#endif /*_M_IX86*/ + +#define MY_ATOMIC_MODE "msvc-intrinsics" +/* Implement using CAS on WIN32 */ +#define IL_COMP_EXCHG32(X,Y,Z) \ + InterlockedCompareExchange((volatile LONG *)(X),(Y),(Z)) +#define IL_COMP_EXCHG64(X,Y,Z) \ + InterlockedCompareExchange64((volatile LONGLONG *)(X), \ + (LONGLONG)(Y),(LONGLONG)(Z)) +#define IL_COMP_EXCHGptr InterlockedCompareExchangePointer + +#define make_atomic_cas_body(S) \ + int ## S initial_cmp= *cmp; \ + int ## S initial_a= IL_COMP_EXCHG ## S (a, set, initial_cmp); \ + if (!(ret= (initial_a == initial_cmp))) *cmp= initial_a; + +#ifndef _M_IX86 +/* Use full set of optimised functions on WIN64 */ +#define IL_EXCHG_ADD32(X,Y) \ + InterlockedExchangeAdd((volatile LONG *)(X),(Y)) +#define IL_EXCHG_ADD64(X,Y) \ + InterlockedExchangeAdd64((volatile LONGLONG *)(X),(LONGLONG)(Y)) +#define IL_EXCHG32(X,Y) \ + InterlockedExchange((volatile LONG *)(X),(Y)) +#define IL_EXCHG64(X,Y) \ + InterlockedExchange64((volatile LONGLONG *)(X),(LONGLONG)(Y)) +#define IL_EXCHGptr InterlockedExchangePointer + +#define make_atomic_add_body(S) \ + v= IL_EXCHG_ADD ## S (a, v) +#define make_atomic_swap_body(S) \ + v= IL_EXCHG ## S (a, v) +#define make_atomic_load_body(S) \ + ret= 0; /* avoid compiler warning */ \ + ret= IL_COMP_EXCHG ## S (a, ret, ret); +#endif +/* + my_yield_processor (equivalent of x86 PAUSE instruction) should be used + to improve performance on hyperthreaded CPUs. Intel recommends to use it in + spin loops also on non-HT machines to reduce power consumption (see e.g + http://softwarecommunity.intel.com/articles/eng/2004.htm) + + Running benchmarks for spinlocks implemented with InterlockedCompareExchange + and YieldProcessor shows that much better performance is achieved by calling + YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting + loop count in the range 200-300 brought best results. + */ +#ifndef YIELD_LOOPS +#define YIELD_LOOPS 200 +#endif + +static __inline int my_yield_processor() +{ + int i; + for(i=0; i<YIELD_LOOPS; i++) + { +#if (_MSC_VER <= 1310) + /* On older compilers YieldProcessor is not available, use inline assembly*/ + __asm { rep nop } +#else + YieldProcessor(); +#endif + } + return 1; +} + +#define LF_BACKOFF my_yield_processor() +#else /* cleanup */ + +#undef IL_EXCHG_ADD32 +#undef IL_EXCHG_ADD64 +#undef IL_COMP_EXCHG32 +#undef IL_COMP_EXCHG64 +#undef IL_COMP_EXCHGptr +#undef IL_EXCHG32 +#undef IL_EXCHG64 +#undef IL_EXCHGptr + +#endif diff --git a/include/atomic/nolock.h b/include/atomic/nolock.h index d7d87167ade..5a0c41d9078 100644 --- a/include/atomic/nolock.h +++ b/include/atomic/nolock.h @@ -16,43 +16,46 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#if defined(__i386__) || defined(_M_IX86) || defined(HAVE_GCC_ATOMIC_BUILTINS) +#if defined(__i386__) || defined(_MSC_VER) || defined(__x86_64__) \ + || defined(HAVE_GCC_ATOMIC_BUILTINS) \ + || defined(HAVE_SOLARIS_ATOMIC) -#ifdef MY_ATOMIC_MODE_DUMMY -# define LOCK "" -#else -# define LOCK "lock" -#endif - -#ifdef HAVE_GCC_ATOMIC_BUILTINS -#include "gcc_builtins.h" -#elif __GNUC__ -#include "x86-gcc.h" -#elif defined(_MSC_VER) -#include "x86-msvc.h" -#endif - -#elif defined(HAVE_SOLARIS_ATOMIC) - -#include "solaris.h" - -#endif /* __i386__ || _M_IX86 || HAVE_GCC_ATOMIC_BUILTINS */ - -#if defined(make_atomic_cas_body) || defined(MY_ATOMICS_MADE) +# ifdef MY_ATOMIC_MODE_DUMMY +# define LOCK_prefix "" +# else +# define LOCK_prefix "lock" +# endif /* - * We have atomics that require no locking - */ -#define MY_ATOMIC_NOLOCK - -#ifdef __SUNPRO_C -/* - * Sun Studio 12 (and likely earlier) does not accept a typedef struct {} - */ -typedef char my_atomic_rwlock_t; -#else -typedef struct { } my_atomic_rwlock_t; + We choose implementation as follows: + ------------------------------------ + On Windows using Visual C++ the native implementation should be + preferrable. When using gcc we prefer the native x86 implementation, + we prefer the Solaris implementation before the gcc because of + stability preference, we choose gcc implementation if nothing else + works on gcc. If neither Visual C++ or gcc we still choose the + Solaris implementation on Solaris (mainly for SunStudio compiles. +*/ +# if defined(_MSV_VER) +# include "generic-msvc.h" +# elif __GNUC__ +# if defined(__i386__) || defined(__x86_64__) +# include "x86-gcc.h" +# elif defined(HAVE_SOLARIS_ATOMIC) +# include "solaris.h" +# elif defined(HAVE_GCC_ATOMIC_BUILTINS) +# include "gcc_builtins.h" +# endif +# elif defined(HAVE_SOLARIS_ATOMIC) +# include "solaris.h" +# endif #endif +#if defined(make_atomic_cas_body) +/* + Type not used so minimal size (emptry struct has different size between C + and C++, zero-length array is gcc-specific). +*/ +typedef char my_atomic_rwlock_t __attribute__ ((unused)); #define my_atomic_rwlock_destroy(name) #define my_atomic_rwlock_init(name) #define my_atomic_rwlock_rdlock(name) diff --git a/include/atomic/rwlock.h b/include/atomic/rwlock.h index 0ff4d16c545..a31f8ed6ca1 100644 --- a/include/atomic/rwlock.h +++ b/include/atomic/rwlock.h @@ -1,7 +1,7 @@ #ifndef ATOMIC_RWLOCK_INCLUDED #define ATOMIC_RWLOCK_INCLUDED -/* Copyright (C) 2006 MySQL AB +/* Copyright (C) 2006 MySQL AB, 2009 Sun Microsystems, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,7 +16,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; +#define MY_ATOMIC_MODE_RWLOCKS 1 #ifdef MY_ATOMIC_MODE_DUMMY /* @@ -26,6 +26,9 @@ typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; implementations (another way is to run a UP build on an SMP box). */ #warning MY_ATOMIC_MODE_DUMMY and MY_ATOMIC_MODE_RWLOCKS are incompatible + +typedef char my_atomic_rwlock_t; + #define my_atomic_rwlock_destroy(name) #define my_atomic_rwlock_init(name) #define my_atomic_rwlock_rdlock(name) @@ -33,18 +36,63 @@ typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; #define my_atomic_rwlock_rdunlock(name) #define my_atomic_rwlock_wrunlock(name) #define MY_ATOMIC_MODE "dummy (non-atomic)" -#else -#define my_atomic_rwlock_destroy(name) pthread_rwlock_destroy(& (name)->rw) -#define my_atomic_rwlock_init(name) pthread_rwlock_init(& (name)->rw, 0) -#define my_atomic_rwlock_rdlock(name) pthread_rwlock_rdlock(& (name)->rw) -#define my_atomic_rwlock_wrlock(name) pthread_rwlock_wrlock(& (name)->rw) -#define my_atomic_rwlock_rdunlock(name) pthread_rwlock_unlock(& (name)->rw) -#define my_atomic_rwlock_wrunlock(name) pthread_rwlock_unlock(& (name)->rw) -#define MY_ATOMIC_MODE "rwlocks" +#else /* not MY_ATOMIC_MODE_DUMMY */ + +typedef struct {pthread_mutex_t rw;} my_atomic_rwlock_t; + +#ifndef SAFE_MUTEX + +/* + we're using read-write lock macros but map them to mutex locks, and they're + faster. Still, having semantically rich API we can change the + underlying implementation, if necessary. +*/ +#define my_atomic_rwlock_destroy(name) pthread_mutex_destroy(& (name)->rw) +#define my_atomic_rwlock_init(name) pthread_mutex_init(& (name)->rw, 0) +#define my_atomic_rwlock_rdlock(name) pthread_mutex_lock(& (name)->rw) +#define my_atomic_rwlock_wrlock(name) pthread_mutex_lock(& (name)->rw) +#define my_atomic_rwlock_rdunlock(name) pthread_mutex_unlock(& (name)->rw) +#define my_atomic_rwlock_wrunlock(name) pthread_mutex_unlock(& (name)->rw) + +#else /* SAFE_MUTEX */ + +/* + SAFE_MUTEX pollutes the compiling name space with macros + that alter pthread_mutex_t, pthread_mutex_init, etc. + Atomic operations should never use the safe mutex wrappers. + Unfortunately, there is no way to have both: + - safe mutex macros expanding pthread_mutex_lock to safe_mutex_lock + - my_atomic macros expanding to unmodified pthread_mutex_lock + inlined in the same compilation unit. + So, in case of SAFE_MUTEX, a function call is required. + Given that SAFE_MUTEX is a debugging facility, + this extra function call is not a performance concern for + production builds. +*/ +C_MODE_START +extern void plain_pthread_mutex_init(safe_mutex_t *); +extern void plain_pthread_mutex_destroy(safe_mutex_t *); +extern void plain_pthread_mutex_lock(safe_mutex_t *); +extern void plain_pthread_mutex_unlock(safe_mutex_t *); +C_MODE_END + +#define my_atomic_rwlock_destroy(name) plain_pthread_mutex_destroy(&(name)->rw) +#define my_atomic_rwlock_init(name) plain_pthread_mutex_init(&(name)->rw) +#define my_atomic_rwlock_rdlock(name) plain_pthread_mutex_lock(&(name)->rw) +#define my_atomic_rwlock_wrlock(name) plain_pthread_mutex_lock(&(name)->rw) +#define my_atomic_rwlock_rdunlock(name) plain_pthread_mutex_unlock(&(name)->rw) +#define my_atomic_rwlock_wrunlock(name) plain_pthread_mutex_unlock(&(name)->rw) + +#endif /* SAFE_MUTEX */ + +#define MY_ATOMIC_MODE "mutex" +#ifndef MY_ATOMIC_MODE_RWLOCKS +#define MY_ATOMIC_MODE_RWLOCKS 1 +#endif #endif #define make_atomic_add_body(S) int ## S sav; sav= *a; *a+= v; v=sav; -#define make_atomic_swap_body(S) int ## S sav; sav= *a; *a= v; v=sav; +#define make_atomic_fas_body(S) int ## S sav; sav= *a; *a= v; v=sav; #define make_atomic_cas_body(S) if ((ret= (*a == *cmp))) *a= set; else *cmp=*a; #define make_atomic_load_body(S) ret= *a; #define make_atomic_store_body(S) *a= v; diff --git a/include/atomic/solaris.h b/include/atomic/solaris.h index 4c51253d2d5..fc9f369c707 100644 --- a/include/atomic/solaris.h +++ b/include/atomic/solaris.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2008 MySQL AB +/* Copyright (C) 2008 MySQL AB, 2009 Sun Microsystems, Inc This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,198 +13,54 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#ifndef _atomic_h_cleanup_ +#define _atomic_h_cleanup_ "atomic/solaris.h" + #include <atomic.h> #define MY_ATOMIC_MODE "solaris-atomic" -/* - * This is defined to indicate we fully define the my_atomic_* (inline) - * functions here, so there is no need to "make" them in my_atomic.h - * using make_atomic_* and make_atomic_*_body. - */ -#define MY_ATOMICS_MADE - -STATIC_INLINE int -my_atomic_cas8(int8 volatile *a, int8 *cmp, int8 set) -{ - int ret; - int8 sav; - sav = (int8) atomic_cas_8((volatile uint8_t *)a, (uint8_t)*cmp, - (uint8_t)set); - if (! (ret = (sav == *cmp))) - *cmp = sav; - return ret; -} - -STATIC_INLINE int -my_atomic_cas16(int16 volatile *a, int16 *cmp, int16 set) -{ - int ret; - int16 sav; - sav = (int16) atomic_cas_16((volatile uint16_t *)a, (uint16_t)*cmp, - (uint16_t)set); - if (! (ret = (sav == *cmp))) - *cmp = sav; - return ret; -} - -STATIC_INLINE int -my_atomic_cas32(int32 volatile *a, int32 *cmp, int32 set) -{ - int ret; - int32 sav; - sav = (int32) atomic_cas_32((volatile uint32_t *)a, (uint32_t)*cmp, - (uint32_t)set); - if (! (ret = (sav == *cmp))) - *cmp = sav; - return ret; -} - -STATIC_INLINE int -my_atomic_casptr(void * volatile *a, void **cmp, void *set) -{ - int ret; - void *sav; - sav = atomic_cas_ptr(a, *cmp, set); - if (! (ret = (sav == *cmp))) - *cmp = sav; - return ret; -} +#define uintptr_t void * +#define atomic_or_ptr_nv(X,Y) (void *)atomic_or_ulong_nv((volatile ulong_t *)X, Y) -/* ------------------------------------------------------------------------ */ +#define make_atomic_cas_body(S) \ + uint ## S ## _t sav; \ + sav = atomic_cas_ ## S( \ + (volatile uint ## S ## _t *)a, \ + (uint ## S ## _t)*cmp, \ + (uint ## S ## _t)set); \ + if (! (ret= (sav == *cmp))) \ + *cmp= sav; -STATIC_INLINE int8 -my_atomic_add8(int8 volatile *a, int8 v) -{ - int8 nv; - nv = atomic_add_8_nv((volatile uint8_t *)a, v); - return (nv - v); -} - -STATIC_INLINE int16 -my_atomic_add16(int16 volatile *a, int16 v) -{ - int16 nv; - nv = atomic_add_16_nv((volatile uint16_t *)a, v); - return (nv - v); -} - -STATIC_INLINE int32 -my_atomic_add32(int32 volatile *a, int32 v) -{ - int32 nv; - nv = atomic_add_32_nv((volatile uint32_t *)a, v); - return (nv - v); -} +#define make_atomic_add_body(S) \ + int ## S nv; /* new value */ \ + nv= atomic_add_ ## S ## _nv((volatile uint ## S ## _t *)a, v); \ + v= nv - v /* ------------------------------------------------------------------------ */ #ifdef MY_ATOMIC_MODE_DUMMY -STATIC_INLINE int8 -my_atomic_load8(int8 volatile *a) { return (*a); } - -STATIC_INLINE int16 -my_atomic_load16(int16 volatile *a) { return (*a); } - -STATIC_INLINE int32 -my_atomic_load32(int32 volatile *a) { return (*a); } - -STATIC_INLINE void * -my_atomic_loadptr(void * volatile *a) { return (*a); } - -/* ------------------------------------------------------------------------ */ - -STATIC_INLINE void -my_atomic_store8(int8 volatile *a, int8 v) { *a = v; } +#define make_atomic_load_body(S) ret= *a +#define make_atomic_store_body(S) *a= v -STATIC_INLINE void -my_atomic_store16(int16 volatile *a, int16 v) { *a = v; } - -STATIC_INLINE void -my_atomic_store32(int32 volatile *a, int32 v) { *a = v; } +#else /* MY_ATOMIC_MODE_DUMMY */ -STATIC_INLINE void -my_atomic_storeptr(void * volatile *a, void *v) { *a = v; } +#define make_atomic_load_body(S) \ + ret= atomic_or_ ## S ## _nv((volatile uint ## S ## _t *)a, 0) -/* ------------------------------------------------------------------------ */ +#define make_atomic_store_body(S) \ + (void) atomic_swap_ ## S((volatile uint ## S ## _t *)a, (uint ## S ## _t)v) -#else /* MY_ATOMIC_MODE_DUMMY */ +#endif -STATIC_INLINE int8 -my_atomic_load8(int8 volatile *a) -{ - return ((int8) atomic_or_8_nv((volatile uint8_t *)a, 0)); -} - -STATIC_INLINE int16 -my_atomic_load16(int16 volatile *a) -{ - return ((int16) atomic_or_16_nv((volatile uint16_t *)a, 0)); -} - -STATIC_INLINE int32 -my_atomic_load32(int32 volatile *a) -{ - return ((int32) atomic_or_32_nv((volatile uint32_t *)a, 0)); -} - -STATIC_INLINE void * -my_atomic_loadptr(void * volatile *a) -{ - return ((void *) atomic_or_ulong_nv((volatile ulong_t *)a, 0)); -} +#define make_atomic_fas_body(S) \ + v= atomic_swap_ ## S((volatile uint ## S ## _t *)a, (uint ## S ## _t)v) -/* ------------------------------------------------------------------------ */ +#else /* cleanup */ -STATIC_INLINE void -my_atomic_store8(int8 volatile *a, int8 v) -{ - (void) atomic_swap_8((volatile uint8_t *)a, (uint8_t)v); -} - -STATIC_INLINE void -my_atomic_store16(int16 volatile *a, int16 v) -{ - (void) atomic_swap_16((volatile uint16_t *)a, (uint16_t)v); -} - -STATIC_INLINE void -my_atomic_store32(int32 volatile *a, int32 v) -{ - (void) atomic_swap_32((volatile uint32_t *)a, (uint32_t)v); -} - -STATIC_INLINE void -my_atomic_storeptr(void * volatile *a, void *v) -{ - (void) atomic_swap_ptr(a, v); -} +#undef uintptr_t +#undef atomic_or_ptr_nv #endif -/* ------------------------------------------------------------------------ */ - -STATIC_INLINE int8 -my_atomic_swap8(int8 volatile *a, int8 v) -{ - return ((int8) atomic_swap_8((volatile uint8_t *)a, (uint8_t)v)); -} - -STATIC_INLINE int16 -my_atomic_swap16(int16 volatile *a, int16 v) -{ - return ((int16) atomic_swap_16((volatile uint16_t *)a, (uint16_t)v)); -} - -STATIC_INLINE int32 -my_atomic_swap32(int32 volatile *a, int32 v) -{ - return ((int32) atomic_swap_32((volatile uint32_t *)a, (uint32_t)v)); -} - -STATIC_INLINE void * -my_atomic_swapptr(void * volatile *a, void *v) -{ - return (atomic_swap_ptr(a, v)); -} diff --git a/include/atomic/x86-gcc.h b/include/atomic/x86-gcc.h index c3029f9c1b4..61b94a48568 100644 --- a/include/atomic/x86-gcc.h +++ b/include/atomic/x86-gcc.h @@ -22,10 +22,24 @@ architectures support double-word (128-bit) cas. */ -#ifdef MY_ATOMIC_NO_XADD -#define MY_ATOMIC_MODE "gcc-x86" LOCK "-no-xadd" +/* + No special support of 8 and 16 bit operations are implemented here + currently. +*/ +#undef MY_ATOMIC_HAS_8_AND_16 + +#ifdef __x86_64__ +# ifdef MY_ATOMIC_NO_XADD +# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix "-no-xadd" +# else +# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix +# endif #else -#define MY_ATOMIC_MODE "gcc-x86" LOCK +# ifdef MY_ATOMIC_NO_XADD +# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix "-no-xadd" +# else +# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix +# endif #endif /* fix -ansi errors while maintaining readability */ @@ -34,29 +48,79 @@ #endif #ifndef MY_ATOMIC_NO_XADD -#define make_atomic_add_body(S) \ - asm volatile (LOCK "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) +#define make_atomic_add_body(S) make_atomic_add_body ## S +#define make_atomic_cas_body(S) make_atomic_cas_body ## S #endif -#define make_atomic_swap_body(S) \ - asm volatile ("; xchg %0, %1;" : "+q" (v) , "+m" (*a)) -#define make_atomic_cas_body(S) \ - asm volatile (LOCK "; cmpxchg %3, %0; setz %2;" \ + +#define make_atomic_add_body32 \ + asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) + +#define make_atomic_cas_body32 \ + asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \ : "+m" (*a), "+a" (*cmp), "=q" (ret): "r" (set)) -#ifdef MY_ATOMIC_MODE_DUMMY -#define make_atomic_load_body(S) ret=*a -#define make_atomic_store_body(S) *a=v -#else +#ifdef __x86_64__ +#define make_atomic_add_body64 make_atomic_add_body32 +#define make_atomic_cas_body64 make_atomic_cas_body32 + +#define make_atomic_fas_body(S) \ + asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a)) + /* Actually 32-bit reads/writes are always atomic on x86 - But we add LOCK here anyway to force memory barriers + But we add LOCK_prefix here anyway to force memory barriers +*/ +#define make_atomic_load_body(S) \ + ret=0; \ + asm volatile (LOCK_prefix "; cmpxchg %2, %0" \ + : "+m" (*a), "+a" (ret): "r" (ret)) +#define make_atomic_store_body(S) \ + asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v)) + +#else +/* + Use default implementations of 64-bit operations since we solved + the 64-bit problem on 32-bit platforms for CAS, no need to solve it + once more for ADD, LOAD, STORE and FAS as well. + Since we already added add32 support, we need to define add64 + here, but we haven't defined fas, load and store at all, so + we can fallback on default implementations. */ -#define make_atomic_load_body(S) \ - ret=0; \ - asm volatile (LOCK "; cmpxchg %2, %0" \ - : "+m" (*a), "+a" (ret): "r" (ret)) -#define make_atomic_store_body(S) \ - asm volatile ("; xchg %0, %1;" : "+m" (*a) : "r" (v)) +#define make_atomic_add_body64 \ + int64 tmp=*a; \ + while (!my_atomic_cas64(a, &tmp, tmp+v)) ; \ + v=tmp; + +/* + On some platforms (e.g. Mac OS X and Solaris) the ebx register + is held as a pointer to the global offset table. Thus we're not + allowed to use the b-register on those platforms when compiling + PIC code, to avoid this we push ebx and pop ebx and add a movl + instruction to avoid having ebx in the interface of the assembler + instruction. + + cmpxchg8b works on both 32-bit platforms and 64-bit platforms but + the code here is only used on 32-bit platforms, on 64-bit + platforms the much simpler make_atomic_cas_body32 will work + fine. +*/ +#define make_atomic_cas_body64 \ + int32 ebx=(set & 0xFFFFFFFF), ecx=(set >> 32); \ + asm volatile ("push %%ebx; movl %3, %%ebx;" \ + LOCK_prefix "; cmpxchg8b %0; setz %2; pop %%ebx"\ + : "+m" (*a), "+A" (*cmp), "=c" (ret) \ + :"m" (ebx), "c" (ecx)) #endif +/* + The implementation of make_atomic_cas_body32 is adaptable to + the OS word size, so on 64-bit platforms it will automatically + adapt to 64-bits and so it will work also on 64-bit platforms +*/ +#define make_atomic_cas_bodyptr make_atomic_cas_body32 + +#ifdef MY_ATOMIC_MODE_DUMMY +#define make_atomic_load_body(S) ret=*a +#define make_atomic_store_body(S) *a=v +#endif #endif /* ATOMIC_X86_GCC_INCLUDED */ diff --git a/include/atomic/x86-msvc.h b/include/atomic/x86-msvc.h deleted file mode 100644 index c4885bb8451..00000000000 --- a/include/atomic/x86-msvc.h +++ /dev/null @@ -1,96 +0,0 @@ -/* Copyright (C) 2006 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - XXX 64-bit atomic operations can be implemented using - cmpxchg8b, if necessary -*/ - -// Would it be better to use intrinsics ? -// (InterlockedCompareExchange, InterlockedCompareExchange16 -// InterlockedExchangeAdd, InterlockedExchange) - -#ifndef _atomic_h_cleanup_ -#define _atomic_h_cleanup_ "atomic/x86-msvc.h" - -#define MY_ATOMIC_MODE "msvc-x86" LOCK - -#define make_atomic_add_body(S) \ - _asm { \ - _asm mov reg_ ## S, v \ - _asm LOCK xadd *a, reg_ ## S \ - _asm movzx v, reg_ ## S \ - } -#define make_atomic_cas_body(S) \ - _asm { \ - _asm mov areg_ ## S, *cmp \ - _asm mov reg2_ ## S, set \ - _asm LOCK cmpxchg *a, reg2_ ## S \ - _asm mov *cmp, areg_ ## S \ - _asm setz al \ - _asm movzx ret, al \ - } -#define make_atomic_swap_body(S) \ - _asm { \ - _asm mov reg_ ## S, v \ - _asm xchg *a, reg_ ## S \ - _asm mov v, reg_ ## S \ - } - -#ifdef MY_ATOMIC_MODE_DUMMY -#define make_atomic_load_body(S) ret=*a -#define make_atomic_store_body(S) *a=v -#else -/* - Actually 32-bit reads/writes are always atomic on x86 - But we add LOCK here anyway to force memory barriers -*/ -#define make_atomic_load_body(S) \ - _asm { \ - _asm mov areg_ ## S, 0 \ - _asm mov reg2_ ## S, areg_ ## S \ - _asm LOCK cmpxchg *a, reg2_ ## S \ - _asm mov ret, areg_ ## S \ - } -#define make_atomic_store_body(S) \ - _asm { \ - _asm mov reg_ ## S, v \ - _asm xchg *a, reg_ ## S \ - } -#endif - -#define reg_8 al -#define reg_16 ax -#define reg_32 eax -#define areg_8 al -#define areg_16 ax -#define areg_32 eax -#define reg2_8 bl -#define reg2_16 bx -#define reg2_32 ebx - -#else /* cleanup */ - -#undef reg_8 -#undef reg_16 -#undef reg_32 -#undef areg_8 -#undef areg_16 -#undef areg_32 -#undef reg2_8 -#undef reg2_16 -#undef reg2_32 -#endif - |