summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/atomic/generic-msvc.h30
-rw-r--r--include/lf.h1
-rw-r--r--include/my_atomic.h9
-rw-r--r--include/my_cpu.h57
-rw-r--r--mysys/lf_alloc-pin.c4
-rw-r--r--mysys/lf_hash.c6
-rw-r--r--mysys/waiting_threads.c2
-rw-r--r--storage/innobase/include/os0once.h3
-rw-r--r--storage/innobase/include/ut0ut.h29
-rw-r--r--storage/innobase/ut/ut0ut.cc2
10 files changed, 67 insertions, 76 deletions
diff --git a/include/atomic/generic-msvc.h b/include/atomic/generic-msvc.h
index 6c5272c98f4..8daa497036f 100644
--- a/include/atomic/generic-msvc.h
+++ b/include/atomic/generic-msvc.h
@@ -90,37 +90,7 @@ C_MODE_END
ret= 0; /* avoid compiler warning */ \
ret= IL_COMP_EXCHG ## S (a, ret, ret);
#endif
-/*
- my_yield_processor (equivalent of x86 PAUSE instruction) should be used
- to improve performance on hyperthreaded CPUs. Intel recommends to use it in
- spin loops also on non-HT machines to reduce power consumption (see e.g
- http://softwarecommunity.intel.com/articles/eng/2004.htm)
-
- Running benchmarks for spinlocks implemented with InterlockedCompareExchange
- and YieldProcessor shows that much better performance is achieved by calling
- YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
- loop count in the range 200-300 brought best results.
- */
-#ifndef YIELD_LOOPS
-#define YIELD_LOOPS 200
-#endif
-
-static __inline int my_yield_processor()
-{
- int i;
- for(i=0; i<YIELD_LOOPS; i++)
- {
-#if (_MSC_VER <= 1310)
- /* On older compilers YieldProcessor is not available, use inline assembly*/
- __asm { rep nop }
-#else
- YieldProcessor();
-#endif
- }
- return 1;
-}
-#define LF_BACKOFF my_yield_processor()
#else /* cleanup */
#undef IL_EXCHG_ADD32
diff --git a/include/lf.h b/include/lf.h
index 1825de62b43..a9d7e9ee688 100644
--- a/include/lf.h
+++ b/include/lf.h
@@ -17,6 +17,7 @@
#define INCLUDE_LF_INCLUDED
#include <my_atomic.h>
+#include <my_cpu.h>
C_MODE_START
diff --git a/include/my_atomic.h b/include/my_atomic.h
index 8f13a0ab89b..c6abcda2d62 100644
--- a/include/my_atomic.h
+++ b/include/my_atomic.h
@@ -346,15 +346,6 @@ make_atomic_store(ptr)
#undef make_atomic_fas_body
#undef intptr
-/*
- the macro below defines (as an expression) the code that
- will be run in spin-loops. Intel manuals recummend to have PAUSE there.
- It is expected to be defined in include/atomic/ *.h files
-*/
-#ifndef LF_BACKOFF
-#define LF_BACKOFF (1)
-#endif
-
#define MY_ATOMIC_OK 0
#define MY_ATOMIC_NOT_1CPU 1
extern int my_atomic_initialize();
diff --git a/include/my_cpu.h b/include/my_cpu.h
index 026b92c1b74..856a8e9b04a 100644
--- a/include/my_cpu.h
+++ b/include/my_cpu.h
@@ -1,3 +1,5 @@
+#ifndef MY_CPU_INCLUDED
+#define MY_CPU_INCLUDED
/* Copyright (c) 2013, MariaDB foundation Ab and SkySQL
This program is free software; you can redistribute it and/or modify
@@ -42,3 +44,58 @@
#define HMT_medium_high()
#define HMT_high()
#endif
+
+
+static inline void MY_RELAX_CPU(void)
+{
+#ifdef HAVE_PAUSE_INSTRUCTION
+ /*
+ According to the gcc info page, asm volatile means that the
+ instruction has important side-effects and must not be removed.
+ Also asm volatile may trigger a memory barrier (spilling all registers
+ to memory).
+ */
+#ifdef __SUNPRO_CC
+ asm ("pause" );
+#else
+ __asm__ __volatile__ ("pause");
+#endif
+
+#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
+ __asm__ __volatile__ ("rep; nop");
+#elif defined _WIN32
+ /*
+ In the Win32 API, the x86 PAUSE instruction is executed by calling
+ the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
+ independent way by using YieldProcessor.
+ */
+ YieldProcessor();
+#elif defined(_ARCH_PWR8)
+ __ppc_get_timebase();
+#else
+ int32 var, oldval = 0;
+ my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED,
+ MY_MEMORY_ORDER_RELAXED);
+#endif
+}
+
+
+/*
+ LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel
+ recommends to use it in spin loops also on non-HT machines to reduce power
+ consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm)
+
+ Running benchmarks for spinlocks implemented with InterlockedCompareExchange
+ and YieldProcessor shows that much better performance is achieved by calling
+ YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
+ loop count in the range 200-300 brought best results.
+*/
+
+static inline int LF_BACKOFF(void)
+{
+ int i;
+ for (i= 0; i < 200; i++)
+ MY_RELAX_CPU();
+ return 1;
+}
+#endif
diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c
index bf2b8a12846..8a96fccf16a 100644
--- a/mysys/lf_alloc-pin.c
+++ b/mysys/lf_alloc-pin.c
@@ -430,7 +430,7 @@ static void alloc_free(uchar *first,
{
anext_node(last)= tmp.node;
} while (!my_atomic_casptr((void **)(char *)&allocator->top,
- (void **)&tmp.ptr, first) && LF_BACKOFF);
+ (void **)&tmp.ptr, first) && LF_BACKOFF());
}
/*
@@ -501,7 +501,7 @@ void *lf_alloc_new(LF_PINS *pins)
{
node= allocator->top;
lf_pin(pins, 0, node);
- } while (node != allocator->top && LF_BACKOFF);
+ } while (node != allocator->top && LF_BACKOFF());
if (!node)
{
node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c
index 430f1007f30..6b3fa78475d 100644
--- a/mysys/lf_hash.c
+++ b/mysys/lf_hash.c
@@ -102,7 +102,7 @@ retry:
do { /* PTR() isn't necessary below, head is a dummy node */
cursor->curr= (LF_SLIST *)(*cursor->prev);
lf_pin(pins, 1, cursor->curr);
- } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
+ } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF());
for (;;)
{
@@ -117,7 +117,7 @@ retry:
link= cursor->curr->link;
cursor->next= PTR(link);
lf_pin(pins, 0, cursor->next);
- } while (link != cursor->curr->link && LF_BACKOFF);
+ } while (link != cursor->curr->link && LF_BACKOFF());
if (!DELETED(link))
{
@@ -145,7 +145,7 @@ retry:
and remove this deleted node
*/
if (my_atomic_casptr((void **) cursor->prev,
- (void **) &cursor->curr, cursor->next) && LF_BACKOFF)
+ (void **) &cursor->curr, cursor->next) && LF_BACKOFF())
lf_alloc_free(pins, cursor->curr);
else
goto retry;
diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c
index 2549bd8a587..6a4139844db 100644
--- a/mysys/waiting_threads.c
+++ b/mysys/waiting_threads.c
@@ -617,7 +617,7 @@ retry:
{
rc= *shared_ptr;
lf_pin(arg->thd->pins, 0, rc);
- } while (rc != *shared_ptr && LF_BACKOFF);
+ } while (rc != *shared_ptr && LF_BACKOFF());
if (rc == 0)
{
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
index 05a45a69f33..551e78d24ba 100644
--- a/storage/innobase/include/os0once.h
+++ b/storage/innobase/include/os0once.h
@@ -30,6 +30,7 @@ Created Feb 20, 2014 Vasil Dimov
#include "univ.i"
#include "ut0ut.h"
+#include "my_cpu.h"
/** Execute a given function exactly once in a multi-threaded environment
or wait for the function to be executed by another thread.
@@ -110,7 +111,7 @@ public:
ut_error;
}
- UT_RELAX_CPU();
+ MY_RELAX_CPU();
}
}
}
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 4e9c2599933..352f5cce83d 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -52,35 +52,6 @@ Created 1/20/1994 Heikki Tuuri
/** Time stamp */
typedef time_t ib_time_t;
-#ifdef HAVE_PAUSE_INSTRUCTION
- /* According to the gcc info page, asm volatile means that the
- instruction has important side-effects and must not be removed.
- Also asm volatile may trigger a memory barrier (spilling all registers
- to memory). */
-# ifdef __SUNPRO_CC
-# define UT_RELAX_CPU() asm ("pause" )
-# else
-# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
-# endif /* __SUNPRO_CC */
-
-#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
-# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-#elif defined _WIN32
- /* In the Win32 API, the x86 PAUSE instruction is executed by calling
- the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
- independent way by using YieldProcessor. */
-# define UT_RELAX_CPU() YieldProcessor()
-#elif defined(__powerpc__) && defined __GLIBC__
-# include <sys/platform/ppc.h>
-# define UT_RELAX_CPU() __ppc_get_timebase()
-#else
-# define UT_RELAX_CPU() do { \
- volatile int32 volatile_var; \
- int32 oldval= 0; \
- my_atomic_cas32(&volatile_var, &oldval, 1); \
- } while (0)
-#endif
-
#if defined (__GNUC__)
# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
#elif defined (_MSC_VER)
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
index 28e327a2a77..88c5c889c8d 100644
--- a/storage/innobase/ut/ut0ut.cc
+++ b/storage/innobase/ut/ut0ut.cc
@@ -293,7 +293,7 @@ ut_delay(
UT_LOW_PRIORITY_CPU();
for (i = 0; i < delay * 50; i++) {
- UT_RELAX_CPU();
+ MY_RELAX_CPU();
UT_COMPILER_BARRIER();
}