diff options
-rw-r--r-- | include/atomic/generic-msvc.h | 30 | ||||
-rw-r--r-- | include/lf.h | 1 | ||||
-rw-r--r-- | include/my_atomic.h | 9 | ||||
-rw-r--r-- | include/my_cpu.h | 57 | ||||
-rw-r--r-- | mysys/lf_alloc-pin.c | 4 | ||||
-rw-r--r-- | mysys/lf_hash.c | 6 | ||||
-rw-r--r-- | mysys/waiting_threads.c | 2 | ||||
-rw-r--r-- | storage/innobase/include/os0once.h | 3 | ||||
-rw-r--r-- | storage/innobase/include/ut0ut.h | 29 | ||||
-rw-r--r-- | storage/innobase/ut/ut0ut.cc | 2 |
10 files changed, 67 insertions, 76 deletions
diff --git a/include/atomic/generic-msvc.h b/include/atomic/generic-msvc.h index 6c5272c98f4..8daa497036f 100644 --- a/include/atomic/generic-msvc.h +++ b/include/atomic/generic-msvc.h @@ -90,37 +90,7 @@ C_MODE_END ret= 0; /* avoid compiler warning */ \ ret= IL_COMP_EXCHG ## S (a, ret, ret); #endif -/* - my_yield_processor (equivalent of x86 PAUSE instruction) should be used - to improve performance on hyperthreaded CPUs. Intel recommends to use it in - spin loops also on non-HT machines to reduce power consumption (see e.g - http://softwarecommunity.intel.com/articles/eng/2004.htm) - - Running benchmarks for spinlocks implemented with InterlockedCompareExchange - and YieldProcessor shows that much better performance is achieved by calling - YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting - loop count in the range 200-300 brought best results. - */ -#ifndef YIELD_LOOPS -#define YIELD_LOOPS 200 -#endif - -static __inline int my_yield_processor() -{ - int i; - for(i=0; i<YIELD_LOOPS; i++) - { -#if (_MSC_VER <= 1310) - /* On older compilers YieldProcessor is not available, use inline assembly*/ - __asm { rep nop } -#else - YieldProcessor(); -#endif - } - return 1; -} -#define LF_BACKOFF my_yield_processor() #else /* cleanup */ #undef IL_EXCHG_ADD32 diff --git a/include/lf.h b/include/lf.h index 1825de62b43..a9d7e9ee688 100644 --- a/include/lf.h +++ b/include/lf.h @@ -17,6 +17,7 @@ #define INCLUDE_LF_INCLUDED #include <my_atomic.h> +#include <my_cpu.h> C_MODE_START diff --git a/include/my_atomic.h b/include/my_atomic.h index 8f13a0ab89b..c6abcda2d62 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -346,15 +346,6 @@ make_atomic_store(ptr) #undef make_atomic_fas_body #undef intptr -/* - the macro below defines (as an expression) the code that - will be run in spin-loops. Intel manuals recummend to have PAUSE there. - It is expected to be defined in include/atomic/ *.h files -*/ -#ifndef LF_BACKOFF -#define LF_BACKOFF (1) -#endif - #define MY_ATOMIC_OK 0 #define MY_ATOMIC_NOT_1CPU 1 extern int my_atomic_initialize(); diff --git a/include/my_cpu.h b/include/my_cpu.h index 026b92c1b74..856a8e9b04a 100644 --- a/include/my_cpu.h +++ b/include/my_cpu.h @@ -1,3 +1,5 @@ +#ifndef MY_CPU_INCLUDED +#define MY_CPU_INCLUDED /* Copyright (c) 2013, MariaDB foundation Ab and SkySQL This program is free software; you can redistribute it and/or modify @@ -42,3 +44,58 @@ #define HMT_medium_high() #define HMT_high() #endif + + +static inline void MY_RELAX_CPU(void) +{ +#ifdef HAVE_PAUSE_INSTRUCTION + /* + According to the gcc info page, asm volatile means that the + instruction has important side-effects and must not be removed. + Also asm volatile may trigger a memory barrier (spilling all registers + to memory). + */ +#ifdef __SUNPRO_CC + asm ("pause" ); +#else + __asm__ __volatile__ ("pause"); +#endif + +#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION) + __asm__ __volatile__ ("rep; nop"); +#elif defined _WIN32 + /* + In the Win32 API, the x86 PAUSE instruction is executed by calling + the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- + independent way by using YieldProcessor. + */ + YieldProcessor(); +#elif defined(_ARCH_PWR8) + __ppc_get_timebase(); +#else + int32 var, oldval = 0; + my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED, + MY_MEMORY_ORDER_RELAXED); +#endif +} + + +/* + LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel + recommends to use it in spin loops also on non-HT machines to reduce power + consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm) + + Running benchmarks for spinlocks implemented with InterlockedCompareExchange + and YieldProcessor shows that much better performance is achieved by calling + YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting + loop count in the range 200-300 brought best results. +*/ + +static inline int LF_BACKOFF(void) +{ + int i; + for (i= 0; i < 200; i++) + MY_RELAX_CPU(); + return 1; +} +#endif diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index bf2b8a12846..8a96fccf16a 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -430,7 +430,7 @@ static void alloc_free(uchar *first, { anext_node(last)= tmp.node; } while (!my_atomic_casptr((void **)(char *)&allocator->top, - (void **)&tmp.ptr, first) && LF_BACKOFF); + (void **)&tmp.ptr, first) && LF_BACKOFF()); } /* @@ -501,7 +501,7 @@ void *lf_alloc_new(LF_PINS *pins) { node= allocator->top; lf_pin(pins, 0, node); - } while (node != allocator->top && LF_BACKOFF); + } while (node != allocator->top && LF_BACKOFF()); if (!node) { node= (void *)my_malloc(allocator->element_size, MYF(MY_WME)); diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 430f1007f30..6b3fa78475d 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -102,7 +102,7 @@ retry: do { /* PTR() isn't necessary below, head is a dummy node */ cursor->curr= (LF_SLIST *)(*cursor->prev); lf_pin(pins, 1, cursor->curr); - } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); + } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF()); for (;;) { @@ -117,7 +117,7 @@ retry: link= cursor->curr->link; cursor->next= PTR(link); lf_pin(pins, 0, cursor->next); - } while (link != cursor->curr->link && LF_BACKOFF); + } while (link != cursor->curr->link && LF_BACKOFF()); if (!DELETED(link)) { @@ -145,7 +145,7 @@ retry: and remove this deleted node */ if (my_atomic_casptr((void **) cursor->prev, - (void **) &cursor->curr, cursor->next) && LF_BACKOFF) + (void **) &cursor->curr, cursor->next) && LF_BACKOFF()) lf_alloc_free(pins, cursor->curr); else goto retry; diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 2549bd8a587..6a4139844db 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -617,7 +617,7 @@ retry: { rc= *shared_ptr; lf_pin(arg->thd->pins, 0, rc); - } while (rc != *shared_ptr && LF_BACKOFF); + } while (rc != *shared_ptr && LF_BACKOFF()); if (rc == 0) { diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h index 05a45a69f33..551e78d24ba 100644 --- a/storage/innobase/include/os0once.h +++ b/storage/innobase/include/os0once.h @@ -30,6 +30,7 @@ Created Feb 20, 2014 Vasil Dimov #include "univ.i" #include "ut0ut.h" +#include "my_cpu.h" /** Execute a given function exactly once in a multi-threaded environment or wait for the function to be executed by another thread. @@ -110,7 +111,7 @@ public: ut_error; } - UT_RELAX_CPU(); + MY_RELAX_CPU(); } } } diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index 4e9c2599933..352f5cce83d 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -52,35 +52,6 @@ Created 1/20/1994 Heikki Tuuri /** Time stamp */ typedef time_t ib_time_t; -#ifdef HAVE_PAUSE_INSTRUCTION - /* According to the gcc info page, asm volatile means that the - instruction has important side-effects and must not be removed. - Also asm volatile may trigger a memory barrier (spilling all registers - to memory). */ -# ifdef __SUNPRO_CC -# define UT_RELAX_CPU() asm ("pause" ) -# else -# define UT_RELAX_CPU() __asm__ __volatile__ ("pause") -# endif /* __SUNPRO_CC */ - -#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION) -# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop") -#elif defined _WIN32 - /* In the Win32 API, the x86 PAUSE instruction is executed by calling - the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- - independent way by using YieldProcessor. */ -# define UT_RELAX_CPU() YieldProcessor() -#elif defined(__powerpc__) && defined __GLIBC__ -# include <sys/platform/ppc.h> -# define UT_RELAX_CPU() __ppc_get_timebase() -#else -# define UT_RELAX_CPU() do { \ - volatile int32 volatile_var; \ - int32 oldval= 0; \ - my_atomic_cas32(&volatile_var, &oldval, 1); \ - } while (0) -#endif - #if defined (__GNUC__) # define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory") #elif defined (_MSC_VER) diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc index 28e327a2a77..88c5c889c8d 100644 --- a/storage/innobase/ut/ut0ut.cc +++ b/storage/innobase/ut/ut0ut.cc @@ -293,7 +293,7 @@ ut_delay( UT_LOW_PRIORITY_CPU(); for (i = 0; i < delay * 50; i++) { - UT_RELAX_CPU(); + MY_RELAX_CPU(); UT_COMPILER_BARRIER(); } |