10 files changed, 67 insertions, 76 deletions
diff --git a/include/atomic/generic-msvc.h b/include/atomic/generic-msvc.h
index 6c5272c98f4..8daa497036f 100644
--- a/include/atomic/generic-msvc.h
+++ b/include/atomic/generic-msvc.h
@@ -90,37 +90,7 @@ C_MODE_END
   ret= 0; /* avoid compiler warning */ \
   ret= IL_COMP_EXCHG ## S (a, ret, ret);
 #endif
-/*
-  my_yield_processor (equivalent of x86 PAUSE instruction) should be used
-  to improve performance on hyperthreaded CPUs. Intel recommends to use it in
-  spin loops also on non-HT machines to reduce power consumption (see e.g 
-  http://softwarecommunity.intel.com/articles/eng/2004.htm)
-
-  Running benchmarks for spinlocks implemented with InterlockedCompareExchange
-  and YieldProcessor shows that much better performance is achieved by calling
-  YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
-  loop count in the range 200-300 brought best results.
- */
-#ifndef YIELD_LOOPS
-#define YIELD_LOOPS 200
-#endif
-
-static __inline int my_yield_processor()
-{
-  int i;
-  for(i=0; i<YIELD_LOOPS; i++)
-  {
-#if (_MSC_VER <= 1310)
-    /* On older compilers YieldProcessor is not available, use inline assembly*/
-    __asm { rep nop }
-#else
-    YieldProcessor();
-#endif
-  }
-  return 1;
-}
 
-#define LF_BACKOFF my_yield_processor()
 #else /* cleanup */
 
 #undef IL_EXCHG_ADD32
diff --git a/include/lf.h b/include/lf.h
index 1825de62b43..a9d7e9ee688 100644
--- a/include/lf.h
+++ b/include/lf.h
@@ -17,6 +17,7 @@
 #define INCLUDE_LF_INCLUDED
 
 #include <my_atomic.h>
+#include <my_cpu.h>
 
 C_MODE_START
 
diff --git a/include/my_atomic.h b/include/my_atomic.h
index 8f13a0ab89b..c6abcda2d62 100644
--- a/include/my_atomic.h
+++ b/include/my_atomic.h
@@ -346,15 +346,6 @@ make_atomic_store(ptr)
 #undef make_atomic_fas_body
 #undef intptr
 
-/*
-  the macro below defines (as an expression) the code that
-  will be run in spin-loops. Intel manuals recummend to have PAUSE there.
-  It is expected to be defined in include/atomic/ *.h files
-*/
-#ifndef LF_BACKOFF
-#define LF_BACKOFF (1)
-#endif
-
 #define MY_ATOMIC_OK       0
 #define MY_ATOMIC_NOT_1CPU 1
 extern int my_atomic_initialize();
diff --git a/include/my_cpu.h b/include/my_cpu.h
index 026b92c1b74..856a8e9b04a 100644
--- a/include/my_cpu.h
+++ b/include/my_cpu.h
@@ -1,3 +1,5 @@
+#ifndef MY_CPU_INCLUDED
+#define MY_CPU_INCLUDED
 /* Copyright (c) 2013, MariaDB foundation Ab and SkySQL
 
    This program is free software; you can redistribute it and/or modify
@@ -42,3 +44,58 @@
 #define HMT_medium_high()
 #define HMT_high()
 #endif
+
+
+static inline void MY_RELAX_CPU(void)
+{
+#ifdef HAVE_PAUSE_INSTRUCTION
+  /*
+    According to the gcc info page, asm volatile means that the
+    instruction has important side-effects and must not be removed.
+    Also asm volatile may trigger a memory barrier (spilling all registers
+    to memory).
+  */
+#ifdef __SUNPRO_CC
+  asm ("pause" );
+#else
+  __asm__ __volatile__ ("pause");
+#endif
+
+#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
+  __asm__ __volatile__ ("rep; nop");
+#elif defined _WIN32
+  /*
+    In the Win32 API, the x86 PAUSE instruction is executed by calling
+    the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
+    independent way by using YieldProcessor.
+  */
+  YieldProcessor();
+#elif defined(_ARCH_PWR8)
+  __ppc_get_timebase();
+#else
+  int32 var, oldval = 0;
+  my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED,
+                                  MY_MEMORY_ORDER_RELAXED);
+#endif
+}
+
+
+/*
+  LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel
+  recommends to use it in spin loops also on non-HT machines to reduce power
+  consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm)
+
+  Running benchmarks for spinlocks implemented with InterlockedCompareExchange
+  and YieldProcessor shows that much better performance is achieved by calling
+  YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
+  loop count in the range 200-300 brought best results.
+*/
+
+static inline int LF_BACKOFF(void)
+{
+  int i;
+  for (i= 0; i < 200; i++)
+    MY_RELAX_CPU();
+  return 1;
+}
+#endif
diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c
index bf2b8a12846..8a96fccf16a 100644
--- a/mysys/lf_alloc-pin.c
+++ b/mysys/lf_alloc-pin.c
@@ -430,7 +430,7 @@ static void alloc_free(uchar *first,
   {
     anext_node(last)= tmp.node;
   } while (!my_atomic_casptr((void **)(char *)&allocator->top,
-                             (void **)&tmp.ptr, first) && LF_BACKOFF);
+                             (void **)&tmp.ptr, first) && LF_BACKOFF());
 }
 
 /*
@@ -501,7 +501,7 @@ void *lf_alloc_new(LF_PINS *pins)
     {
       node= allocator->top;
      lf_pin(pins, 0, node);
-    } while (node != allocator->top && LF_BACKOFF);
+    } while (node != allocator->top && LF_BACKOFF());
     if (!node)
     {
       node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c
index 430f1007f30..6b3fa78475d 100644
--- a/mysys/lf_hash.c
+++ b/mysys/lf_hash.c
@@ -102,7 +102,7 @@ retry:
   do { /* PTR() isn't necessary below, head is a dummy node */
     cursor->curr= (LF_SLIST *)(*cursor->prev);
     lf_pin(pins, 1, cursor->curr);
-  } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
+  } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF());
 
   for (;;)
   {
@@ -117,7 +117,7 @@ retry:
       link= cursor->curr->link;
       cursor->next= PTR(link);
       lf_pin(pins, 0, cursor->next);
-    } while (link != cursor->curr->link && LF_BACKOFF);
+    } while (link != cursor->curr->link && LF_BACKOFF());
 
     if (!DELETED(link))
     {
@@ -145,7 +145,7 @@ retry:
         and remove this deleted node
       */
       if (my_atomic_casptr((void **) cursor->prev,
-                           (void **) &cursor->curr, cursor->next) && LF_BACKOFF)
+                           (void **) &cursor->curr, cursor->next) && LF_BACKOFF())
         lf_alloc_free(pins, cursor->curr);
       else
         goto retry;
diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c
index 2549bd8a587..6a4139844db 100644
--- a/mysys/waiting_threads.c
+++ b/mysys/waiting_threads.c
@@ -617,7 +617,7 @@ retry:
   {
     rc= *shared_ptr;
     lf_pin(arg->thd->pins, 0, rc);
-  } while (rc != *shared_ptr && LF_BACKOFF);
+  } while (rc != *shared_ptr && LF_BACKOFF());
 
   if (rc == 0)
   {
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
index 05a45a69f33..551e78d24ba 100644
--- a/storage/innobase/include/os0once.h
+++ b/storage/innobase/include/os0once.h
@@ -30,6 +30,7 @@ Created Feb 20, 2014 Vasil Dimov
 #include "univ.i"
 
 #include "ut0ut.h"
+#include "my_cpu.h"
 
 /** Execute a given function exactly once in a multi-threaded environment
 or wait for the function to be executed by another thread.
@@ -110,7 +111,7 @@ public:
 					ut_error;
 				}
 
-				UT_RELAX_CPU();
+				MY_RELAX_CPU();
 			}
 		}
 	}
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 4e9c2599933..352f5cce83d 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -52,35 +52,6 @@ Created 1/20/1994 Heikki Tuuri
 /** Time stamp */
 typedef time_t	ib_time_t;
 
-#ifdef HAVE_PAUSE_INSTRUCTION
-   /* According to the gcc info page, asm volatile means that the
-   instruction has important side-effects and must not be removed.
-   Also asm volatile may trigger a memory barrier (spilling all registers
-   to memory). */
-# ifdef __SUNPRO_CC
-#  define UT_RELAX_CPU() asm ("pause" )
-# else
-#  define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
-# endif /* __SUNPRO_CC */
-
-#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
-# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-#elif defined _WIN32
-   /* In the Win32 API, the x86 PAUSE instruction is executed by calling
-   the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
-   independent way by using YieldProcessor. */
-# define UT_RELAX_CPU() YieldProcessor()
-#elif defined(__powerpc__) && defined __GLIBC__
-# include <sys/platform/ppc.h>
-# define UT_RELAX_CPU() __ppc_get_timebase()
-#else
-# define UT_RELAX_CPU() do { \
-     volatile int32	volatile_var; \
-     int32 oldval= 0; \
-     my_atomic_cas32(&volatile_var, &oldval, 1); \
-   } while (0)
-#endif
-
 #if defined (__GNUC__)
 # define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
 #elif defined (_MSC_VER)
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
index 28e327a2a77..88c5c889c8d 100644
--- a/storage/innobase/ut/ut0ut.cc
+++ b/storage/innobase/ut/ut0ut.cc
@@ -293,7 +293,7 @@ ut_delay(
 	UT_LOW_PRIORITY_CPU();
 
 	for (i = 0; i < delay * 50; i++) {
-		UT_RELAX_CPU();
+		MY_RELAX_CPU();
 		UT_COMPILER_BARRIER();
 	}