summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladislav Vaintroub <wlad@sol>2009-11-30 01:49:26 +0000
committerVladislav Vaintroub <wlad@sol>2009-11-30 01:49:26 +0000
commit43770e8f3f3fd22259b330756fad72737b079263 (patch)
treea9ea0d12503dfc16738cc769a0e6879ad6daa702
parent50d56f0906e5ebbf3a53844ed13c101c3d9cf790 (diff)
parent3c11750e363da6e1017d5bc86e9e7e03b2c4e101 (diff)
downloadmariadb-git-43770e8f3f3fd22259b330756fad72737b079263.tar.gz
merge
-rw-r--r--configure.cmake26
-rw-r--r--configure.in62
-rw-r--r--include/Makefile.am1
-rw-r--r--include/my_rdtsc.h129
-rwxr-xr-xmysys/CMakeLists.txt11
-rw-r--r--mysys/Makefile.am10
-rw-r--r--mysys/my_rdtsc.c1004
-rw-r--r--mysys/my_timer_cycles.il38
-rw-r--r--unittest/mysys/CMakeLists.txt2
-rw-r--r--unittest/mysys/Makefile.am7
-rw-r--r--unittest/mysys/my_rdtsc-t.c229
11 files changed, 1508 insertions, 11 deletions
diff --git a/configure.cmake b/configure.cmake
index efc94f09244..d16e19166d5 100644
--- a/configure.cmake
+++ b/configure.cmake
@@ -627,6 +627,32 @@ CHECK_FUNCTION_EXISTS_UNIX (memalign HAVE_MEMALIGN)
CHECK_FUNCTION_EXISTS_UNIX (chown HAVE_CHOWN)
CHECK_FUNCTION_EXISTS_UNIX (nl_langinfo HAVE_NL_LANGINFO)
+#--------------------------------------------------------------------
+# Support for WL#2373 (Use cycle counter for timing)
+#--------------------------------------------------------------------
+
+CHECK_INCLUDE_FILES_UNIX(time.h HAVE_TIME_H)
+CHECK_INCLUDE_FILES_UNIX(sys/time.h HAVE_SYS_TIME_H)
+CHECK_INCLUDE_FILES_UNIX(sys/times.h HAVE_SYS_TIMES_H)
+CHECK_INCLUDE_FILES_UNIX(asm/msr.h HAVE_ASM_MSR_H)
+#msr.h has rdtscll()
+
+CHECK_INCLUDE_FILES_UNIX(ia64intrin.h HAVE_IA64INTRIN_H)
+
+CHECK_FUNCTION_EXISTS_UNIX(times HAVE_TIMES)
+CHECK_FUNCTION_EXISTS_UNIX(gettimeofday HAVE_GETTIMEOFDAY)
+CHECK_FUNCTION_EXISTS_UNIX(read_real_time HAVE_READ_REAL_TIME)
+# This should work on AIX.
+
+CHECK_FUNCTION_EXISTS_UNIX(ftime HAVE_FTIME)
+# This is still a normal call for milliseconds.
+
+CHECK_FUNCTION_EXISTS_UNIX(time HAVE_TIME)
+# We can use time() on Macintosh if there is no ftime().
+
+CHECK_FUNCTION_EXISTS_UNIX(rdtscll HAVE_RDTSCLL)
+# I doubt that we'll ever reach the check for this.
+
#
# Tests for symbols
diff --git a/configure.in b/configure.in
index 11448bda3c3..818281c33ff 100644
--- a/configure.in
+++ b/configure.in
@@ -1,6 +1,21 @@
dnl -*- ksh -*-
dnl Process this file with autoconf to produce a configure script.
+# Copyright (C) 2008-2009 Sun Microsystems, Inc
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
AC_PREREQ(2.52)dnl Minimum Autoconf version required.
AC_INIT(sql/mysqld.cc)
@@ -2914,7 +2929,54 @@ case $SYSTEM_TYPE in
esac
AC_SUBST(MAKE_BINARY_DISTRIBUTION_OPTIONS)
+#--------------------------------------------------------------------
+# Support for WL#2373 (Use cycle counter for timing)
+#--------------------------------------------------------------------
+
+AC_CHECK_HEADERS(time.h)
+AC_CHECK_HEADERS(sys/time.h)
+AC_CHECK_HEADERS(sys/times.h)
+AC_CHECK_HEADERS(asm/msr.h)
+#msr.h has rdtscll()
+
+AC_CHECK_HEADERS(ia64intrin.h)
+
+AC_CHECK_FUNCS(times)
+AC_CHECK_FUNCS(gettimeofday)
+AC_CHECK_FUNCS(read_real_time)
+# This should work on AIX.
+
+AC_CHECK_FUNCS(ftime)
+# This is still a normal call for milliseconds.
+
+AC_CHECK_FUNCS(time)
+# We can use time() on Macintosh if there is no ftime().
+
+AC_CHECK_FUNCS(rdtscll)
+# I doubt that we'll ever reach the check for this.
+
+# When compiling with Sun Studio C / C++ we need to include
+# my_timer_cycles.il, an "inline templates" separate file,
+# on the command line. It has assembly code, "rd %tick" for
+# SPARC or "rdtsc" for x86.
+RDTSC_SPARC_ASSEMBLY=""
+case $CC_VERSION in
+ *Sun*C*)
+ RDTSC_SPARC_ASSEMBLY="my_timer_cycles.il"
+ ;;
+esac
+case $CXX_VERSION in
+ *Sun*C++*)
+ RDTSC_SPARC_ASSEMBLY="my_timer_cycles.il"
+ ;;
+esac
+
+AC_SUBST([RDTSC_SPARC_ASSEMBLY])
+
+#--------------------------------------------------------------------
# Output results
+#--------------------------------------------------------------------
+
if test -d "$srcdir/pstack" ; then
AC_CONFIG_FILES(pstack/Makefile pstack/aout/Makefile)
fi
diff --git a/include/Makefile.am b/include/Makefile.am
index acbc1086372..3d866961389 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -38,6 +38,7 @@ noinst_HEADERS = config-win.h config-netware.h lf.h my_bit.h \
my_aes.h my_tree.h my_trie.h hash.h thr_alarm.h \
thr_lock.h t_ctype.h violite.h my_md5.h base64.h \
my_handler.h my_time.h service_versions.h \
+ my_rdtsc.h \
my_vle.h my_user.h my_atomic.h atomic/nolock.h \
atomic/rwlock.h atomic/x86-gcc.h atomic/generic-msvc.h \
atomic/gcc_builtins.h my_libwrap.h my_stacktrace.h \
diff --git a/include/my_rdtsc.h b/include/my_rdtsc.h
new file mode 100644
index 00000000000..81bc1aafb58
--- /dev/null
+++ b/include/my_rdtsc.h
@@ -0,0 +1,129 @@
+/* Copyright (C) 2008, 2009 Sun Microsystems, Inc
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ rdtsc3 -- multi-platform timer code
+ pgulutzan@mysql.com, 2005-08-29
+ modified 2008-11-02
+*/
+
+#ifndef MY_RDTSC_H
+#define MY_RDTSC_H
+
+/**
+ Characteristics of a timer.
+*/
+struct my_timer_unit_info
+{
+ /** Routine used for the timer. */
+ ulonglong routine;
+ /** Overhead of the timer. */
+ ulonglong overhead;
+ /** Frequency of the timer. */
+ ulonglong frequency;
+ /** Resolution of the timer. */
+ ulonglong resolution;
+};
+
+/**
+ Characteristics of all the supported timers.
+ @sa my_timer_init().
+*/
+struct my_timer_info
+{
+ /** Characteristics of the cycle timer. */
+ struct my_timer_unit_info cycles;
+ /** Characteristics of the nanosecond timer. */
+ struct my_timer_unit_info nanoseconds;
+ /** Characteristics of the microsecond timer. */
+ struct my_timer_unit_info microseconds;
+ /** Characteristics of the millisecond timer. */
+ struct my_timer_unit_info milliseconds;
+ /** Characteristics of the tick timer. */
+ struct my_timer_unit_info ticks;
+};
+
+typedef struct my_timer_info MY_TIMER_INFO;
+
+C_MODE_START
+
+/**
+ A cycle timer.
+ @return the current timer value, in cycles.
+*/
+ulonglong my_timer_cycles(void);
+
+/**
+ A namoseconds timer.
+ @return the current timer value, in nanoseconds.
+*/
+ulonglong my_timer_nanoseconds(void);
+
+/**
+ A microseconds timer.
+ @return the current timer value, in microseconds.
+*/
+ulonglong my_timer_microseconds(void);
+
+/**
+ A millisecond timer.
+ @return the current timer value, in milliseconds.
+*/
+ulonglong my_timer_milliseconds(void);
+
+/**
+ A ticks timer.
+ @return the current timer value, in ticks.
+*/
+ulonglong my_timer_ticks(void);
+
+/**
+ Timer initialization function.
+ @param [out] mti the timer characteristics.
+*/
+void my_timer_init(MY_TIMER_INFO *mti);
+
+C_MODE_END
+
+#define MY_TIMER_ROUTINE_ASM_X86 1
+#define MY_TIMER_ROUTINE_ASM_X86_64 2
+#define MY_TIMER_ROUTINE_RDTSCLL 3
+#define MY_TIMER_ROUTINE_ASM_X86_WIN 4
+#define MY_TIMER_ROUTINE_RDTSC 5
+#define MY_TIMER_ROUTINE_ASM_IA64 6
+#define MY_TIMER_ROUTINE_ASM_PPC 7
+#define MY_TIMER_ROUTINE_SGI_CYCLE 8
+#define MY_TIMER_ROUTINE_GETHRTIME 9
+#define MY_TIMER_ROUTINE_READ_REAL_TIME 10
+#define MY_TIMER_ROUTINE_CLOCK_GETTIME 11
+#define MY_TIMER_ROUTINE_NXGETTIME 12
+#define MY_TIMER_ROUTINE_GETTIMEOFDAY 13
+#define MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER 14
+#define MY_TIMER_ROUTINE_GETTICKCOUNT 15
+#define MY_TIMER_ROUTINE_TIME 16
+#define MY_TIMER_ROUTINE_TIMES 17
+#define MY_TIMER_ROUTINE_FTIME 18
+#define MY_TIMER_ROUTINE_ASM_PPC64 19
+#define MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC64 20
+#define MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC32 21
+#define MY_TIMER_ROUTINE_ASM_SUNPRO_I386 22
+#define MY_TIMER_ROUTINE_ASM_GCC_SPARC64 23
+#define MY_TIMER_ROUTINE_ASM_GCC_SPARC32 24
+#define MY_TIMER_ROUTINE_MACH_ABSOLUTE_TIME 25
+#define MY_TIMER_ROUTINE_GETSYSTEMTIMEASFILETIME 26
+#define MY_TIMER_ROUTINE_ASM_SUNPRO_X86_64 27
+
+#endif
+
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt
index b9ecd4bf83a..b6170170be1 100755
--- a/mysys/CMakeLists.txt
+++ b/mysys/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2006 MySQL AB
+# Copyright (C) 2006 MySQL AB, 2009 Sun Microsystems, Inc
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -35,7 +35,8 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_
rijndael.c safemalloc.c sha1.c string.c thr_alarm.c thr_lock.c thr_mutex.c
thr_rwlock.c tree.c typelib.c my_vle.c base64.c my_memmem.c my_getpagesize.c
lf_alloc-pin.c lf_dynarray.c lf_hash.c
- my_atomic.c my_getncpus.c)
+ my_atomic.c my_getncpus.c
+ my_rdtsc.c)
IF (WIN32)
SET (MYSYS_SOURCES ${MYSYS_SOURCES} my_winthread.c my_wincond.c my_winerr.c my_winfile.c my_windac.c my_conio.c)
@@ -47,6 +48,12 @@ IF(CMAKE_COMPILER_IS_GNUCC AND NOT HAVE_CXX_NEW)
ADD_DEFINITIONS( -DUSE_MYSYS_NEW)
ENDIF()
+IF(CMAKE_C_COMPILER MATCHES SunPro)
+ # Inline assembly template for rdtsc
+ SET_SOURCE_FILE_PROPERTIES(my_rdtsc.c
+ PROPERTIES COMPILE_FLAGS ${CMAKE_CURRENT_SOURCE_DIR}/my_timer_cycles.il)
+ENDIF()
+
IF(HAVE_LARGE_PAGES)
SET(MYSYS_SOURCES ${MYSYS_SOURCES} my_largepage.c)
ENDIF()
diff --git a/mysys/Makefile.am b/mysys/Makefile.am
index 3b53fc1dd59..476b63d3ad5 100644
--- a/mysys/Makefile.am
+++ b/mysys/Makefile.am
@@ -16,7 +16,8 @@
MYSQLDATAdir = $(localstatedir)
MYSQLSHAREdir = $(pkgdatadir)
MYSQLBASEdir= $(prefix)
-INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
+INCLUDES = @ZLIB_INCLUDES@ @RDTSC_SPARC_ASSEMBLY@ \
+ -I$(top_builddir)/include \
-I$(top_srcdir)/include -I$(srcdir)
pkglib_LIBRARIES = libmysys.a
LDADD = libmysys.a $(top_builddir)/strings/libmystrings.a $(top_builddir)/dbug/libdbug.a
@@ -53,7 +54,8 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \
my_gethostbyname.c rijndael.c my_aes.c sha1.c \
my_handler.c my_netware.c my_largepage.c \
my_memmem.c stacktrace.c \
- my_windac.c my_access.c base64.c my_libwrap.c
+ my_windac.c my_access.c base64.c my_libwrap.c \
+ my_rdtsc.c
if NEED_THREAD
# mf_keycache is used only in the server, so it is safe to leave the file
@@ -65,7 +67,9 @@ endif
EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
thr_mutex.c thr_rwlock.c \
CMakeLists.txt mf_soundex.c \
- my_conio.c my_wincond.c my_winthread.c my_winerr.c my_winfile.c
+ my_conio.c my_wincond.c my_winthread.c my_winerr.c \
+ my_winfile.c \
+ my_timer_cycles.il
libmysys_a_LIBADD = @THREAD_LOBJECTS@
# test_dir_DEPENDENCIES= $(LIBRARIES)
# testhash_DEPENDENCIES= $(LIBRARIES)
diff --git a/mysys/my_rdtsc.c b/mysys/my_rdtsc.c
new file mode 100644
index 00000000000..4227498b92c
--- /dev/null
+++ b/mysys/my_rdtsc.c
@@ -0,0 +1,1004 @@
+/* Copyright (C) 2008, 2009 Sun Microsystems, Inc
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ rdtsc3 -- multi-platform timer code
+ pgulutzan@mysql.com, 2005-08-29
+ modified 2008-11-02
+
+ Functions:
+
+ my_timer_cycles ulonglong cycles
+ my_timer_nanoseconds ulonglong nanoseconds
+ my_timer_microseconds ulonglong "microseconds"
+ my_timer_milliseconds ulonglong milliseconds
+ my_timer_ticks ulonglong ticks
+ my_timer_init initialization / test
+
+ We'll call the first 5 functions (the ones that return
+ a ulonglong) "my_timer_xxx" functions.
+ Each my_timer_xxx function returns a 64-bit timing value
+ since an arbitrary 'epoch' start. Since the only purpose
+ is to determine elapsed times, wall-clock time-of-day
+ is not known and not relevant.
+
+ The my_timer_init function is necessary for initializing.
+ It returns information (underlying routine name,
+ frequency, resolution, overhead) about all my_timer_xxx
+ functions. A program should call my_timer_init once,
+ use the information to decide what my_timer_xxx function
+ to use, and subsequently call that function by function
+ pointer.
+
+ A typical use would be:
+ my_timer_init() ... once, at program start
+ ...
+ time1= my_timer_xxx() ... time before start
+ [code that's timed]
+ time2= my_timer_xxx() ... time after end
+ elapsed_time= (time2 - time1) - overhead
+*/
+
+#include "my_global.h"
+#include "my_rdtsc.h"
+
+#if defined(_WIN32)
+#include <stdio.h>
+#include "windows.h"
+#else
+#include <stdio.h>
+#endif
+
+#if !defined(_WIN32)
+#if TIME_WITH_SYS_TIME
+#include <sys/time.h>
+#include <time.h> /* for clock_gettime */
+#else
+#if HAVE_SYS_TIME_H
+#include <sys/time.h>
+#elif defined(HAVE_TIME_H)
+#include <time.h>
+#endif
+#endif
+#endif
+
+#if defined(HAVE_ASM_MSR_H) && defined(HAVE_RDTSCLL)
+#include <asm/msr.h> /* for rdtscll */
+#endif
+
+#if defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME)
+#include <sys/timeb.h> /* for ftime */
+#endif
+
+#if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
+#include <sys/times.h> /* for times */
+#endif
+
+#if defined(__NETWARE__)
+#include <nks/time.h> /* for NXGetTime */
+#endif
+
+#if defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
+#include <ia64intrin.h> /* for __GetReg */
+#endif
+
+#if defined(__APPLE__) && defined(__MACH__)
+#include <mach/mach_time.h>
+#endif
+
+#if defined(__SUNPRO_CC) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
+extern "C" ulonglong my_timer_cycles_il_sparc64();
+#elif defined(__SUNPRO_CC) && defined(__sparcv8plus) && defined(_ILP32) && !defined(__SunOS_5_7)
+extern "C" ulonglong my_timer_cycles_il_sparc32();
+#elif defined(__SUNPRO_CC) && defined(__i386) && defined(_ILP32)
+extern "C" ulonglong my_timer_cycles_il_i386();
+#elif defined(__SUNPRO_CC) && defined(__x86_64) && defined(_LP64)
+extern "C" ulonglong my_timer_cycles_il_x86_64();
+#elif defined(__SUNPRO_C) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
+ulonglong my_timer_cycles_il_sparc64();
+#elif defined(__SUNPRO_C) && defined(__sparcv8plus) && defined(_ILP32) && !defined(__SunOS_5_7)
+ulonglong my_timer_cycles_il_sparc32();
+#elif defined(__SUNPRO_C) && defined(__i386) && defined(_ILP32)
+ulonglong my_timer_cycles_il_i386();
+#elif defined(__SUNPRO_C) && defined(__x86_64) && defined(_LP64)
+ulonglong my_timer_cycles_il_x86_64();
+#endif
+
+#if defined(__INTEL_COMPILER)
+/*
+ icc warning #1011 is:
+ missing return statement at end of non-void function
+*/
+#pragma warning (disable:1011)
+#endif
+
+/*
+ For cycles, we depend on RDTSC for x86 platforms,
+ or on time buffer (which is not really a cycle count
+ but a separate counter with less than nanosecond
+ resolution) for most PowerPC platforms, or on
+ gethrtime which is okay for hpux and solaris, or on
+ clock_gettime(CLOCK_SGI_CYCLE) for Irix platforms,
+ or on read_real_time for aix platforms. There is
+ nothing for Alpha platforms, they would be tricky.
+*/
+
+ulonglong my_timer_cycles(void)
+{
+#if defined(__GNUC__) && defined(__i386__)
+ /* This works much better if compiled with "gcc -O3". */
+ ulonglong result;
+ __asm__ __volatile__ ("rdtsc" : "=A" (result));
+ return result;
+#elif defined(__SUNPRO_C) && defined(__i386)
+ __asm("rdtsc");
+#elif defined(__GNUC__) && defined(__x86_64__)
+ ulonglong result;
+ __asm__ __volatile__ ("rdtsc\n\t" \
+ "shlq $32,%%rdx\n\t" \
+ "orq %%rdx,%%rax"
+ : "=a" (result) :: "%edx");
+ return result;
+#elif defined(HAVE_ASM_MSR_H) && defined(HAVE_RDTSCLL)
+ {
+ ulonglong result;
+ rdtscll(result);
+ return result;
+ }
+#elif defined(_WIN32) && defined(_M_IX86)
+ __asm {rdtsc};
+#elif defined(_WIN64) && defined(_M_X64)
+ /* For 64-bit Windows: unsigned __int64 __rdtsc(); */
+ return __rdtsc();
+#elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
+ return (ulonglong) __getReg(_IA64_REG_AR_ITC); /* (3116) */
+#elif defined(__GNUC__) && defined(__ia64__)
+ {
+ ulonglong result;
+ __asm __volatile__ ("mov %0=ar.itc" : "=r" (result));
+ return result;
+ }
+#elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (defined(__64BIT__) || defined(_ARCH_PPC64))
+ {
+ ulonglong result;
+ __asm __volatile__ ("mftb %0" : "=r" (result));
+ return result;
+ }
+#elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (!defined(__64BIT__) && !defined(_ARCH_PPC64))
+ {
+ /*
+ mftbu means "move from time-buffer-upper to result".
+ The loop is saying: x1=upper, x2=lower, x3=upper,
+ if x1!=x3 there was an overflow so repeat.
+ */
+ unsigned int x1, x2, x3;
+ ulonglong result;
+ for (;;)
+ {
+ __asm __volatile__ ( "mftbu %0" : "=r"(x1) );
+ __asm __volatile__ ( "mftb %0" : "=r"(x2) );
+ __asm __volatile__ ( "mftbu %0" : "=r"(x3) );
+ if (x1 == x3) break;
+ }
+ result = x1;
+ return ( result << 32 ) | x2;
+ }
+#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
+ return (my_timer_cycles_il_sparc64());
+#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv8plus) && defined(_ILP32) && !defined(__SunOS_5_7)
+ return (my_timer_cycles_il_sparc32());
+#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__i386) && defined(_ILP32)
+ /* This is probably redundant for __SUNPRO_C. */
+ return (my_timer_cycles_il_i386());
+#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__x86_64) && defined(_LP64)
+ return (my_timer_cycles_il_x86_64());
+#elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64) && (__GNUC__>2)
+ {
+ ulonglong result;
+ __asm __volatile__ ("rd %%tick,%0" : "=r" (result));
+ return result;
+ }
+#elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64) && (__GNUC__>2)
+ {
+ union {
+ ulonglong wholeresult;
+ struct {
+ ulong high;
+ ulong low;
+ } splitresult;
+ } result;
+ __asm __volatile__ ("rd %%tick,%1; srlx %1,32,%0" : "=r" (result.splitresult.high), "=r" (result.splitresult.low));
+ return result.wholeresult;
+ }
+#elif defined(__sgi) && defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE)
+ {
+ struct timespec tp;
+ clock_gettime(CLOCK_SGI_CYCLE, &tp);
+ return (ulonglong) tp.tv_sec * 1000000000 + (ulonglong) tp.tv_nsec;
+ }
+#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
+ /* gethrtime may appear as either cycle or nanosecond counter */
+ return (ulonglong) gethrtime();
+#else
+ return 0;
+#endif
+}
+
+#if defined(__INTEL_COMPILER)
+/* re-enable warning#1011 which was only for my_timer_cycles() */
+/* There may be an icc bug which means we must leave disabled. */
+#pragma warning (default:1011)
+#endif
+
+/*
+ For nanoseconds, most platforms have nothing available that
+ (a) doesn't require bringing in a 40-kb librt.so library
+ (b) really has nanosecond resolution.
+*/
+
+ulonglong my_timer_nanoseconds(void)
+{
+#if defined(HAVE_READ_REAL_TIME)
+ {
+ timebasestruct_t tr;
+ read_real_time(&tr, TIMEBASE_SZ);
+ return (ulonglong) tr.tb_high * 1000000000 + (ulonglong) tr.tb_low;
+ }
+#elif defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_REALTIME)
+ {
+ struct timespec tp;
+ clock_gettime(CLOCK_REALTIME, &tp);
+ return (ulonglong) tp.tv_sec * 1000000000 + (ulonglong) tp.tv_nsec;
+ }
+#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
+ /* SunOS 5.10+, Solaris, HP-UX: hrtime_t gethrtime(void) */
+ return (ulonglong) gethrtime();
+#elif defined(__NETWARE__)
+ {
+ NXTime_t tm;
+ NXGetTime(NX_SINCE_1970, NX_NSECONDS, &tm);
+ return (ulonglong) tm;
+ }
+#elif defined(__APPLE__) && defined(__MACH__)
+ {
+ ulonglong tm;
+ static mach_timebase_info_data_t timebase_info= {0,0};
+ if (timebase_info.denom == 0)
+ (void) mach_timebase_info(&timebase_info);
+ tm= mach_absolute_time();
+ return (tm * timebase_info.numer) / timebase_info.denom;
+ }
+#else
+ return 0;
+#endif
+}
+
+/*
+ For microseconds, gettimeofday() is available on
+ almost all platforms. On Windows we use
+ QueryPerformanceCounter which will usually tick over
+ 3.5 million times per second, and we don't throw
+ away the extra precision. (On Windows Server 2003
+ the frequency is same as the cycle frequency.)
+*/
+
+ulonglong my_timer_microseconds(void)
+{
+#if defined(HAVE_GETTIMEOFDAY)
+ {
+ static ulonglong last_value= 0;
+ struct timeval tv;
+ if (gettimeofday(&tv, NULL) == 0)
+ last_value= (ulonglong) tv.tv_sec * 1000000 + (ulonglong) tv.tv_usec;
+ else
+ {
+ /*
+ There are reports that gettimeofday(2) can have intermittent failures
+ on some platform, see for example Bug#36819.
+ We are not trying again or looping, just returning the best value possible
+ under the circumstances ...
+ */
+ last_value++;
+ }
+ return last_value;
+ }
+#elif defined(_WIN32)
+ {
+ /* QueryPerformanceCounter usually works with about 1/3 microsecond. */
+ LARGE_INTEGER t_cnt;
+
+ QueryPerformanceCounter(&t_cnt);
+ return (ulonglong) t_cnt.QuadPart;
+ }
+#elif defined(__NETWARE__)
+ {
+ NXTime_t tm;
+ NXGetTime(NX_SINCE_1970, NX_USECONDS, &tm);
+ return (ulonglong) tm;
+ }
+#else
+ return 0;
+#endif
+}
+
+/*
+ For milliseconds, we use ftime() if it's supported
+ or time()*1000 if it's not. With modern versions of
+ Windows and with HP Itanium, resolution is 10-15
+ milliseconds.
+*/
+
+ulonglong my_timer_milliseconds(void)
+{
+#if defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME)
+ /* ftime() is obsolete but maybe the platform is old */
+ struct timeb ft;
+ ftime(&ft);
+ return (ulonglong)ft.time * 1000 + (ulonglong)ft.millitm;
+#elif defined(HAVE_TIME)
+ return (ulonglong) time(NULL) * 1000;
+#elif defined(_WIN32)
+ FILETIME ft;
+ GetSystemTimeAsFileTime( &ft );
+ return ((ulonglong)ft.dwLowDateTime +
+ (((ulonglong)ft.dwHighDateTime) << 32))/10000;
+#elif defined(__NETWARE__)
+ {
+ NXTime_t tm;
+ NXGetTime(NX_SINCE_1970, NX_MSECONDS, &tm);
+ return (ulonglong)tm;
+ }
+#else
+ return 0;
+#endif
+}
+
+/*
+ For ticks, which we handle with times(), the frequency
+ is usually 100/second and the overhead is surprisingly
+ bad, sometimes even worse than gettimeofday's overhead.
+*/
+
+ulonglong my_timer_ticks(void)
+{
+#if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
+ {
+ struct tms times_buf;
+ return (ulonglong) times(&times_buf);
+ }
+#elif defined(__NETWARE__)
+ {
+ NXTime_t tm;
+ NXGetTime(NX_SINCE_BOOT, NX_TICKS, &tm);
+ return (ulonglong) tm;
+ }
+#elif defined(_WIN32)
+ return (ulonglong) GetTickCount();
+#else
+ return 0;
+#endif
+}
+
+/*
+ The my_timer_init() function and its sub-functions
+ have several loops which call timers. If there's
+ something wrong with a timer -- which has never
+ happened in tests -- we want the loop to end after
+ an arbitrary number of iterations, and my_timer_info
+ will show a discouraging result. The arbitrary
+ number is 1,000,000.
+*/
+#define MY_TIMER_ITERATIONS 1000000
+
+/*
+ Calculate overhead. Called from my_timer_init().
+ Usually best_timer_overhead = cycles.overhead or
+ nanoseconds.overhead, so returned amount is in
+ cycles or nanoseconds. We repeat the calculation
+ ten times, so that we can disregard effects of
+ caching or interrupts. Result is quite consistent
+ for cycles, at least. But remember it's a minimum.
+*/
+
+static void my_timer_init_overhead(ulonglong *overhead,
+ ulonglong (*cycle_timer)(void),
+ ulonglong (*this_timer)(void),
+ ulonglong best_timer_overhead)
+{
+ ulonglong time1, time2;
+ int i;
+
+ /* *overhead, least of 20 calculations - cycles.overhead */
+ for (i= 0, *overhead= 1000000000; i < 20; ++i)
+ {
+ time1= cycle_timer();
+ this_timer(); /* rather than 'time_tmp= timer();' */
+ time2= cycle_timer() - time1;
+ if (*overhead > time2)
+ *overhead= time2;
+ }
+ *overhead-= best_timer_overhead;
+}
+
+/*
+ Calculate Resolution. Called from my_timer_init().
+ If a timer goes up by jumps, e.g. 1050, 1075, 1100, ...
+ then the best resolution is the minimum jump, e.g. 25.
+ If it's always divisible by 1000 then it's just a
+ result of multiplication of a lower-precision timer
+ result, e.g. nanoseconds are often microseconds * 1000.
+ If the minimum jump is less than an arbitrary passed
+ figure (a guess based on maximum overhead * 2), ignore.
+ Usually we end up with nanoseconds = 1 because it's too
+ hard to detect anything <= 100 nanoseconds.
+ Often GetTickCount() has resolution = 15.
+ We don't check with ticks because they take too long.
+*/
+static ulonglong my_timer_init_resolution(ulonglong (*this_timer)(void),
+ ulonglong overhead_times_2)
+{
+ ulonglong time1, time2;
+ ulonglong best_jump;
+ int i, jumps, divisible_by_1000, divisible_by_1000000;
+
+ divisible_by_1000= divisible_by_1000000= 0;
+ best_jump= 1000000;
+ for (i= jumps= 0; jumps < 3 && i < MY_TIMER_ITERATIONS * 10; ++i)
+ {
+ time1= this_timer();
+ time2= this_timer();
+ time2-= time1;
+ if (time2)
+ {
+ ++jumps;
+ if (!(time2 % 1000))
+ {
+ ++divisible_by_1000;
+ if (!(time2 % 1000000))
+ ++divisible_by_1000000;
+ }
+ if (best_jump > time2)
+ best_jump= time2;
+ /* For milliseconds, one jump is enough. */
+ if (overhead_times_2 == 0)
+ break;
+ }
+ }
+ if (jumps == 3)
+ {
+ if (jumps == divisible_by_1000000)
+ return 1000000;
+ if (jumps == divisible_by_1000)
+ return 1000;
+ }
+ if (best_jump > overhead_times_2)
+ return best_jump;
+ return 1;
+}
+
+/*
+ Calculate cycle frequency by seeing how many cycles pass
+ in a 200-microsecond period. I tried with 10-microsecond
+ periods originally, and the result was often very wrong.
+*/
+
+static ulonglong my_timer_init_frequency(MY_TIMER_INFO *mti)
+{
+ int i;
+ ulonglong time1, time2, time3, time4;
+ time1= my_timer_cycles();
+ time2= my_timer_microseconds();
+ time3= time2; /* Avoids a Microsoft/IBM compiler warning */
+ for (i= 0; i < MY_TIMER_ITERATIONS; ++i)
+ {
+ time3= my_timer_microseconds();
+ if (time3 - time2 > 200) break;
+ }
+ time4= my_timer_cycles() - mti->cycles.overhead;
+ time4-= mti->microseconds.overhead;
+ return (mti->microseconds.frequency * (time4 - time1)) / (time3 - time2);
+}
+
+/*
+ Call my_timer_init before the first call to my_timer_xxx().
+ If something must be initialized, it happens here.
+ Set: what routine is being used e.g. "asm_x86"
+ Set: function, overhead, actual frequency, resolution.
+*/
+
+void my_timer_init(MY_TIMER_INFO *mti)
+{
+ ulonglong (*best_timer)(void);
+ ulonglong best_timer_overhead;
+ ulonglong time1, time2;
+ int i;
+
+ /* cycles */
+ mti->cycles.frequency= 1000000000;
+#if defined(__GNUC__) && defined(__i386__)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86;
+#elif defined(__SUNPRO_C) && defined(__i386)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86;
+#elif defined(__GNUC__) && defined(__x86_64__)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86_64;
+#elif defined(HAVE_ASM_MSR_H) && defined(HAVE_RDTSCLL)
+ mti->cycles.routine= MY_TIMER_ROUTINE_RDTSCLL;
+#elif defined(_WIN32) && defined(_M_IX86)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86_WIN;
+#elif defined(_WIN64) && defined(_M_X64)
+ mti->cycles.routine= MY_TIMER_ROUTINE_RDTSC;
+#elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_IA64;
+#elif defined(__GNUC__) && defined(__ia64__)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_IA64;
+#elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (defined(__64BIT__) || defined(_ARCH_PPC64))
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_PPC64;
+#elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (!defined(__64BIT__) && !defined(_ARCH_PPC64))
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_PPC;
+#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC64;
+#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv8plus) && defined(_ILP32) && !defined(__SunOS_5_7)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC32;
+#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__i386) && defined(_ILP32)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_I386;
+#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__x86_64) && defined(_LP64)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_X86_64;
+#elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64) && (__GNUC__>2)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_GCC_SPARC64;
+#elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64) && (__GNUC__>2)
+ mti->cycles.routine= MY_TIMER_ROUTINE_ASM_GCC_SPARC32;
+#elif defined(__sgi) && defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE)
+ mti->cycles.routine= MY_TIMER_ROUTINE_SGI_CYCLE;
+#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
+ mti->cycles.routine= MY_TIMER_ROUTINE_GETHRTIME;
+#else
+ mti->cycles.routine= 0;
+#endif
+
+ if (!mti->cycles.routine || !my_timer_cycles())
+ {
+ mti->cycles.routine= 0;
+ mti->cycles.resolution= 0;
+ mti->cycles.frequency= 0;
+ mti->cycles.overhead= 0;
+ }
+
+ /* nanoseconds */
+ mti->nanoseconds.frequency= 1000000000; /* initial assumption */
+#if defined(HAVE_READ_REAL_TIME)
+ mti->nanoseconds.routine= MY_TIMER_ROUTINE_READ_REAL_TIME;
+#elif defined(HAVE_CLOCK_GETTIME)
+ mti->nanoseconds.routine= MY_TIMER_ROUTINE_CLOCK_GETTIME;
+#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
+ mti->nanoseconds.routine= MY_TIMER_ROUTINE_GETHRTIME;
+#elif defined(__NETWARE__)
+ mti->nanoseconds.routine= MY_TIMER_ROUTINE_NXGETTIME;
+#elif defined(__APPLE__) && defined(__MACH__)
+ mti->nanoseconds.routine= MY_TIMER_ROUTINE_MACH_ABSOLUTE_TIME;
+#else
+ mti->nanoseconds.routine= 0;
+#endif
+ if (!mti->nanoseconds.routine || !my_timer_nanoseconds())
+ {
+ mti->nanoseconds.routine= 0;
+ mti->nanoseconds.resolution= 0;
+ mti->nanoseconds.frequency= 0;
+ mti->nanoseconds.overhead= 0;
+ }
+
+ /* microseconds */
+ mti->microseconds.frequency= 1000000; /* initial assumption */
+#if defined(HAVE_GETTIMEOFDAY)
+ mti->microseconds.routine= MY_TIMER_ROUTINE_GETTIMEOFDAY;
+#elif defined(_WIN32)
+ {
+ LARGE_INTEGER li;
+ /* Windows: typical frequency = 3579545, actually 1/3 microsecond. */
+ if (!QueryPerformanceFrequency(&li))
+ mti->microseconds.routine= 0;
+ else
+ {
+ mti->microseconds.frequency= li.QuadPart;
+ mti->microseconds.routine= MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER;
+ }
+ }
+#elif defined(__NETWARE__)
+ mti->microseconds.routine= MY_TIMER_ROUTINE_NXGETTIME;
+#else
+ mti->microseconds.routine= 0;
+#endif
+ if (!mti->microseconds.routine || !my_timer_microseconds())
+ {
+ mti->microseconds.routine= 0;
+ mti->microseconds.resolution= 0;
+ mti->microseconds.frequency= 0;
+ mti->microseconds.overhead= 0;
+ }
+
+ /* milliseconds */
+ mti->milliseconds.frequency= 1000; /* initial assumption */
+#if defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME)
+ mti->milliseconds.routine= MY_TIMER_ROUTINE_FTIME;
+#elif defined(_WIN32)
+ mti->milliseconds.routine= MY_TIMER_ROUTINE_GETSYSTEMTIMEASFILETIME;
+#elif defined(__NETWARE__)
+ mti->milliseconds.routine= MY_TIMER_ROUTINE_NXGETTIME;
+#elif defined(HAVE_TIME)
+ mti->milliseconds.routine= MY_TIMER_ROUTINE_TIME;
+#else
+ mti->milliseconds.routine= 0;
+#endif
+ if (!mti->milliseconds.routine || !my_timer_milliseconds())
+ {
+ mti->milliseconds.routine= 0;
+ mti->milliseconds.resolution= 0;
+ mti->milliseconds.frequency= 0;
+ mti->milliseconds.overhead= 0;
+ }
+
+ /* ticks */
+ mti->ticks.frequency= 100; /* permanent assumption */
+#if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
+ mti->ticks.routine= MY_TIMER_ROUTINE_TIMES;
+#elif defined(__NETWARE__)
+ mti->ticks.routine= MY_TIMER_ROUTINE_NXGETTIME;
+#elif defined(_WIN32)
+ mti->ticks.routine= MY_TIMER_ROUTINE_GETTICKCOUNT;
+#else
+ mti->ticks.routine= 0;
+#endif
+ if (!mti->ticks.routine || !my_timer_ticks())
+ {
+ mti->ticks.routine= 0;
+ mti->ticks.resolution= 0;
+ mti->ticks.frequency= 0;
+ mti->ticks.overhead= 0;
+ }
+
+ /*
+ Calculate overhead in terms of the timer that
+ gives the best resolution: cycles or nanoseconds.
+ I doubt it ever will be as bad as microseconds.
+ */
+ if (mti->cycles.routine)
+ best_timer= &my_timer_cycles;
+ else
+ {
+ if (mti->nanoseconds.routine)
+ {
+ best_timer= &my_timer_nanoseconds;
+ }
+ else
+ best_timer= &my_timer_microseconds;
+ }
+
+ /* best_timer_overhead = least of 20 calculations */
+ for (i= 0, best_timer_overhead= 1000000000; i < 20; ++i)
+ {
+ time1= best_timer();
+ time2= best_timer() - time1;
+ if (best_timer_overhead > time2)
+ best_timer_overhead= time2;
+ }
+ if (mti->cycles.routine)
+ my_timer_init_overhead(&mti->cycles.overhead,
+ best_timer,
+ &my_timer_cycles,
+ best_timer_overhead);
+ if (mti->nanoseconds.routine)
+ my_timer_init_overhead(&mti->nanoseconds.overhead,
+ best_timer,
+ &my_timer_nanoseconds,
+ best_timer_overhead);
+ if (mti->microseconds.routine)
+ my_timer_init_overhead(&mti->microseconds.overhead,
+ best_timer,
+ &my_timer_microseconds,
+ best_timer_overhead);
+ if (mti->milliseconds.routine)
+ my_timer_init_overhead(&mti->milliseconds.overhead,
+ best_timer,
+ &my_timer_milliseconds,
+ best_timer_overhead);
+ if (mti->ticks.routine)
+ my_timer_init_overhead(&mti->ticks.overhead,
+ best_timer,
+ &my_timer_ticks,
+ best_timer_overhead);
+
+/*
+ Calculate resolution for nanoseconds or microseconds
+ or milliseconds, by seeing if it's always divisible
+ by 1000, and by noticing how much jumping occurs.
+ For ticks, just assume the resolution is 1.
+*/
+ if (mti->cycles.routine)
+ mti->cycles.resolution= 1;
+ if (mti->nanoseconds.routine)
+ mti->nanoseconds.resolution=
+ my_timer_init_resolution(&my_timer_nanoseconds, 20000);
+ if (mti->microseconds.routine)
+ mti->microseconds.resolution=
+ my_timer_init_resolution(&my_timer_microseconds, 20);
+ if (mti->milliseconds.routine)
+ {
+ if (mti->milliseconds.routine == MY_TIMER_ROUTINE_TIME)
+ mti->milliseconds.resolution= 1000;
+ else
+ mti->milliseconds.resolution=
+ my_timer_init_resolution(&my_timer_milliseconds, 0);
+ }
+ if (mti->ticks.routine)
+ mti->ticks.resolution= 1;
+
+/*
+ Calculate cycles frequency,
+ if we have both a cycles routine and a microseconds routine.
+ In tests, this usually results in a figure within 2% of
+ what "cat /proc/cpuinfo" says.
+ If the microseconds routine is QueryPerformanceCounter
+ (i.e. it's Windows), and the microseconds frequency is >
+ 500,000,000 (i.e. it's Windows Server so it uses RDTSC)
+ and the microseconds resolution is > 100 (i.e. dreadful),
+ then calculate cycles frequency = microseconds frequency.
+*/
+ if (mti->cycles.routine
+ && mti->microseconds.routine)
+ {
+ if (mti->microseconds.routine ==
+ MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER
+ && mti->microseconds.frequency > 500000000
+ && mti->microseconds.resolution > 100)
+ mti->cycles.frequency= mti->microseconds.frequency;
+ else
+ {
+ ulonglong time1, time2;
+ time1= my_timer_init_frequency(mti);
+ /* Repeat once in case there was an interruption. */
+ time2= my_timer_init_frequency(mti);
+ if (time1 < time2) mti->cycles.frequency= time1;
+ else mti->cycles.frequency= time2;
+ }
+ }
+
+/*
+ Calculate milliseconds frequency =
+ (cycles-frequency/#-of-cycles) * #-of-milliseconds,
+ if we have both a milliseconds routine and a cycles
+ routine.
+ This will be inaccurate if milliseconds resolution > 1.
+ This is probably only useful when testing new platforms.
+*/
+ if (mti->milliseconds.routine
+ && mti->milliseconds.resolution < 1000
+ && mti->microseconds.routine
+ && mti->cycles.routine)
+ {
+ int i;
+ ulonglong time1, time2, time3, time4;
+ time1= my_timer_cycles();
+ time2= my_timer_milliseconds();
+ time3= time2; /* Avoids a Microsoft/IBM compiler warning */
+ for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i)
+ {
+ time3= my_timer_milliseconds();
+ if (time3 - time2 > 10) break;
+ }
+ time4= my_timer_cycles();
+ mti->milliseconds.frequency=
+ (mti->cycles.frequency * (time3 - time2)) / (time4 - time1);
+ }
+
+/*
+ Calculate ticks.frequency =
+ (cycles-frequency/#-of-cycles * #-of-ticks,
+ if we have both a ticks routine and a cycles
+ routine,
+ This is probably only useful when testing new platforms.
+*/
+ if (mti->ticks.routine
+ && mti->microseconds.routine
+ && mti->cycles.routine)
+ {
+ int i;
+ ulonglong time1, time2, time3, time4;
+ time1= my_timer_cycles();
+ time2= my_timer_ticks();
+ time3= time2; /* Avoids a Microsoft/IBM compiler warning */
+ for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i)
+ {
+ time3= my_timer_ticks();
+ if (time3 - time2 > 10) break;
+ }
+ time4= my_timer_cycles();
+ mti->ticks.frequency=
+ (mti->cycles.frequency * (time3 - time2)) / (time4 - time1);
+ }
+}
+
+/*
+ Additional Comments
+ -------------------
+
+ This is for timing, i.e. finding out how long a piece of code
+ takes. If you want time of day matching a wall clock, the
+ my_timer_xxx functions won't help you.
+
+ The best timer is the one with highest frequency, lowest
+ overhead, and resolution=1. The my_timer_info() routine will tell
+ you at runtime which timer that is. Usually it will be
+ my_timer_cycles() but be aware that, although it's best,
+ it has possible flaws and dangers. Depending on platform:
+ - The frequency might change. We don't test for this. It
+ happens on laptops for power saving, and on blade servers
+ for avoiding overheating.
+ - The overhead that my_timer_init() returns is the minimum.
+ In fact it could be slightly greater because of caching or
+ because you call the routine by address, as recommended.
+ It could be hugely greater if there's an interrupt.
+ - The x86 cycle counter, RDTSC doesn't "serialize". That is,
+ if there is out-of-order execution, rdtsc might be processed
+ after an instruction that logically follows it.
+ (We could force serialization, but that would be slower.)
+ - It is possible to set a flag which renders RDTSC
+ inoperative. Somebody responsible for the kernel
+ of the operating system would have to make this
+ decision. For the platforms we've tested with, there's
+ no such problem.
+ - With a multi-processor arrangement, it's possible
+ to get the cycle count from one processor in
+ thread X, and the cycle count from another processor
+ in thread Y. They may not always be in synch.
+ - You can't depend on a cycle counter being available for
+ all platforms. On Alphas, the
+ cycle counter is only 32-bit, so it would overflow quickly,
+ so we don't bother with it. On platforms that we haven't
+ tested, there might be some if/endif combination that we
+ didn't expect, or some assembler routine that we didn't
+ supply.
+
+ The recommended way to use the timer routines is:
+ 1. Somewhere near the beginning of the program, call
+ my_timer_init(). This should only be necessary once,
+ although you can call it again if you think that the
+ frequency has changed.
+ 2. Determine the best timer based on frequency, resolution,
+ overhead -- all things that my_timer_init() returns.
+ Preserve the address of the timer and the my_timer_into
+ results in an easily-accessible place.
+ 3. Instrument the code section that you're monitoring, thus:
+ time1= my_timer_xxx();
+ Instrumented code;
+ time2= my_timer_xxx();
+ elapsed_time= (time2 - time1) - overhead;
+ If the timer is always on, then overhead is always there,
+ so don't subtract it.
+ 4. Save the elapsed time, or add it to a totaller.
+ 5. When all timing processes are complete, transfer the
+ saved / totalled elapsed time to permanent storage.
+ Optionally you can convert cycles to microseconds at
+ this point. (Don't do so every time you calculate
+ elapsed_time! That would waste time and lose precision!)
+ For converting cycles to microseconds, use the frequency
+ that my_timer_init() returns. You'll also need to convert
+ if the my_timer_microseconds() function is the Windows
+ function QueryPerformanceCounter(), since that's sometimes
+ a counter with precision slightly better than microseconds.
+
+ Since we recommend calls by function pointer, we supply
+ no inline functions.
+
+ Some comments on the many candidate routines for timing ...
+
+ clock() -- We don't use because it would overflow frequently.
+
+ clock_gettime() -- Often we don't use this even when it exists.
+ In configure.in, we use AC_CHECK_FUNCS(clock_gettime). Not
+ AC_CHECK_LIB(rc,clock_gettime)
+ AC_CHECK_FUNCS(clock_gettime)
+ If we had the above lines in configure.in, we'd have to use
+ /usr/lib/librt.so or /usr/lib64/librt.so when linking, and
+ the size of librt.so is 40KB. In tests, clock_gettime often
+ had resolution = 1000.
+
+ ftime() -- A "man ftime" says: "This function is obsolete.
+ Don't use it." On every platform that we tested, if ftime()
+ was available, then so was gettimeofday(), and gettimeofday()
+ overhead was always at least as good as ftime() overhead.
+
+ gettimeofday() -- available on most platforms, though not
+ on Windows. There is a hardware timer (sometimes a Programmable
+ Interrupt Timer or "PIT") (sometimes a "HPET") used for
+ interrupt generation. When it interrupts (a "tick" or "jiffy",
+ typically 1 centisecond) it sets xtime. For gettimeofday, a
+ Linux kernel routine usually gets xtime and then gets rdtsc
+ to get elapsed nanoseconds since the last tick. On Red Hat
+ Enterprise Linux 3, there was once a bug which caused the
+ resolution to be 1000, i.e. one centisecond. We never check
+ for time-zone change.
+
+ getnstimeofday() -- something to watch for in future Linux
+
+ do_gettimeofday() -- exists on Linux but not for "userland"
+
+ get_cycles() -- a multi-platform function, worth watching
+ in future Linux versions. But we found platform-specific
+ functions which were better documented in operating-system
+ manuals. And get_cycles() can fail or return a useless
+ 32-bit number. It might be available on some platforms,
+ such as arm, which we didn't test. Using
+ "include <linux/timex.h>" or "include <asm/timex.h>"
+ can lead to autoconf or compile errors, depending on system.
+
+ rdtsc, __rdtsc, rdtscll: available for x86 with Linux BSD,
+ Solaris, Windows. See "possible flaws and dangers" comments.
+
+ times(): what we use for ticks. Should just read the last
+ (xtime) tick count, therefore should be fast, but usually
+ isn't.
+
+ GetTickCount(): we use this for my_timer_ticks() on
+ Windows. Actually it really is a tick counter, so resolution
+ >= 10 milliseconds unless you have a very old Windows version.
+ With Windows 95 or 98 or ME, timeGetTime() has better resolution than
+ GetTickCount (1ms rather than 55ms). But with Windows NT or XP or 2000,
+ they're both getting from a variable in the Process Environment Block
+ (PEB), and the variable is set by the programmable interrupt timer, so
+ the resolution is the same (usually 10-15 milliseconds). Also timeGetTime
+ is slower on old machines:
+ http://www.doumo.jp/aon-java/jsp/postgretips/tips.jsp?tips=74.
+ Also timeGetTime requires linking winmm.lib,
+ Therefore we use GetTickCount.
+ It will overflow every 49 days because the return is 32-bit.
+ There is also a GetTickCount64 but it requires Vista or Windows Server 2008.
+ (As for GetSystemTimeAsFileTime, its precision is spurious, it
+ just reads the tick variable like the other functions do.
+ However, we don't expect it to overflow every 49 days, so we
+ will prefer it for my_timer_milliseconds().)
+
+ QueryPerformanceCounter() we use this for my_timer_microseconds()
+ on Windows. 1-PIT-tick (often 1/3-microsecond). Usually reads
+ the PIT so it's slow. On some Windows variants, uses RDTSC.
+
+ GetLocalTime() this is available on Windows but we don't use it.
+
+ getclock(): documented for Alpha, but not found during tests.
+
+ mach_absolute_time() and UpTime() are recommended for Apple.
+ Inititally they weren't tried, because asm_ppc seems to do the job.
+ But now we use mach_absolute_time for nanoseconds.
+
+ Any clock-based timer can be affected by NPT (ntpd program),
+ which means:
+ - full-second correction can occur for leap second
+ - tiny corrections can occcur approimately every 11 minutes
+ (but I think they only affect the RTC which isn't the PIT).
+
+ We define "precision" as "frequency" and "high precision" is
+ "frequency better than 1 microsecond". We define "resolution"
+ as a synonym for "granularity". We define "accuracy" as
+ "closeness to the truth" as established by some authoritative
+ clock, but we can't measure accuracy.
+
+ Do not expect any of our timers to be monotonic; we
+ won't guarantee that they return constantly-increasing
+ unique numbers.
+
+ We tested with AIX, Solaris (x86 + Sparc), Linux (x86 +
+ Itanium), Windows, 64-bit Windows, QNX, FreeBSD, HPUX,
+ Irix, Mac. We didn't test with NetWare or SCO.
+
+*/
+
diff --git a/mysys/my_timer_cycles.il b/mysys/my_timer_cycles.il
new file mode 100644
index 00000000000..2f3f776530b
--- /dev/null
+++ b/mysys/my_timer_cycles.il
@@ -0,0 +1,38 @@
+/* Copyright (C) 2008 Sun Microsystems, Inc
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Sun Studio SPARC inline templates for cycle timer */
+/* Sun Studio i386 and x86_64 inline templates for cycle timer */
+/* I didn't say ".volatile" or ".nonvolatile". */
+
+.inline my_timer_cycles_il_sparc64,0
+rd %tick,%o0
+.end
+
+.inline my_timer_cycles_il_sparc32,0
+rd %tick,%o2
+srlx %o2,32,%o0
+sra %o2,0,%o1
+.end
+
+.inline my_timer_cycles_il_i386,0
+rdtsc
+.end
+
+.inline my_timer_cycles_il_x86_64,0
+rdtsc
+shlq $32,%rdx
+orq %rdx,%rax
+.end
diff --git a/unittest/mysys/CMakeLists.txt b/unittest/mysys/CMakeLists.txt
index 6377ec1ab74..3bf23df6066 100644
--- a/unittest/mysys/CMakeLists.txt
+++ b/unittest/mysys/CMakeLists.txt
@@ -27,6 +27,6 @@ MACRO (MY_ADD_TEST name)
ENDMACRO()
-FOREACH(testname bitmap base64 my_vsnprintf my_atomic lf)
+FOREACH(testname bitmap base64 my_vsnprintf my_atomic my_rdtsc lf)
MY_ADD_TEST(${testname})
ENDFOREACH()
diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am
index 47cc9841994..25b30e331b0 100644
--- a/unittest/mysys/Makefile.am
+++ b/unittest/mysys/Makefile.am
@@ -23,9 +23,7 @@ LDADD = $(top_builddir)/unittest/mytap/libmytap.a \
$(top_builddir)/dbug/libdbug.a \
$(top_builddir)/strings/libmystrings.a
-noinst_PROGRAMS = bitmap-t base64-t lf-t my_vsnprintf-t
-EXTRA_DIST = CMakeLists.txt
-
+noinst_PROGRAMS = bitmap-t base64-t lf-t my_rdtsc-t my_vsnprintf-t
if NEED_THREAD
# my_atomic-t is used to check thread functions, so it is safe to
@@ -34,5 +32,4 @@ if NEED_THREAD
noinst_PROGRAMS += my_atomic-t
endif
-# Don't update the files from bitkeeper
-%::SCCS/s.%
+EXTRA_DIST = CMakeLists.txt
diff --git a/unittest/mysys/my_rdtsc-t.c b/unittest/mysys/my_rdtsc-t.c
new file mode 100644
index 00000000000..286ff29fbfc
--- /dev/null
+++ b/unittest/mysys/my_rdtsc-t.c
@@ -0,0 +1,229 @@
+/* Copyright (C) 2008-2009 Sun Microsystems, Inc
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ rdtsc3 -- multi-platform timer code
+ pgulutzan@mysql.com, 2005-08-29
+ modified 2008-11-02
+
+ When you run rdtsc3, it will print the contents of
+ "my_timer_info". The display indicates
+ what timer routine is best for a given platform.
+
+ For example, this is the display on production.mysql.com,
+ a 2.8GHz Xeon with Linux 2.6.17, gcc 3.3.3:
+
+ cycles nanoseconds microseconds milliseconds ticks
+------------- ------------- ------------- ------------- -------------
+ 1 11 13 18 17
+ 2815019607 1000000000 1000000 1049 102
+ 1 1000 1 1 1
+ 88 4116 3888 4092 2044
+
+ The first line shows routines, e.g. 1 = MY_TIMER_ROUTINE_ASM_X86.
+ The second line shows frequencies, e.g. 2815019607 is nearly 2.8GHz.
+ The third line shows resolutions, e.g. 1000 = very poor resolution.
+ The fourth line shows overheads, e.g. ticks takes 2044 cycles.
+*/
+
+#include "my_global.h"
+#include "my_rdtsc.h"
+#include "tap.h"
+
+#define LOOP_COUNT 100
+
+MY_TIMER_INFO myt;
+
+void test_init()
+{
+ my_timer_init(&myt);
+
+ diag("----- Routine ---------------");
+ diag("myt.cycles.routine : %13llu", myt.cycles.routine);
+ diag("myt.nanoseconds.routine : %13llu", myt.nanoseconds.routine);
+ diag("myt.microseconds.routine : %13llu", myt.microseconds.routine);
+ diag("myt.milliseconds.routine : %13llu", myt.milliseconds.routine);
+ diag("myt.ticks.routine : %13llu", myt.ticks.routine);
+
+ diag("----- Frequency -------------");
+ diag("myt.cycles.frequency : %13llu", myt.cycles.frequency);
+ diag("myt.nanoseconds.frequency : %13llu", myt.nanoseconds.frequency);
+ diag("myt.microseconds.frequency : %13llu", myt.microseconds.frequency);
+ diag("myt.milliseconds.frequency : %13llu", myt.milliseconds.frequency);
+ diag("myt.ticks.frequency : %13llu", myt.ticks.frequency);
+
+ diag("----- Resolution ------------");
+ diag("myt.cycles.resolution : %13llu", myt.cycles.resolution);
+ diag("myt.nanoseconds.resolution : %13llu", myt.nanoseconds.resolution);
+ diag("myt.microseconds.resolution : %13llu", myt.microseconds.resolution);
+ diag("myt.milliseconds.resolution : %13llu", myt.milliseconds.resolution);
+ diag("myt.ticks.resolution : %13llu", myt.ticks.resolution);
+
+ diag("----- Overhead --------------");
+ diag("myt.cycles.overhead : %13llu", myt.cycles.overhead);
+ diag("myt.nanoseconds.overhead : %13llu", myt.nanoseconds.overhead);
+ diag("myt.microseconds.overhead : %13llu", myt.microseconds.overhead);
+ diag("myt.milliseconds.overhead : %13llu", myt.milliseconds.overhead);
+ diag("myt.ticks.overhead : %13llu", myt.ticks.overhead);
+
+ ok(1, "my_timer_init() did not crash");
+}
+
+void test_cycle()
+{
+ ulonglong t1= my_timer_cycles();
+ ulonglong t2;
+ int i;
+ int backward= 0;
+ int nonzero= 0;
+
+ for (i=0 ; i < LOOP_COUNT ; i++)
+ {
+ t2= my_timer_cycles();
+ if (t1 >= t2)
+ backward++;
+ if (t2 != 0)
+ nonzero++;
+ t1= t2;
+ }
+
+ /* Expect at most 1 backward, the cycle value can overflow */
+ ok((backward <= 1), "The cycle timer is strictly increasing");
+
+ if (myt.cycles.routine != 0)
+ ok((nonzero != 0), "The cycle timer is implemented");
+ else
+ ok((nonzero == 0), "The cycle timer is not implemented and returns 0");
+}
+
+void test_nanosecond()
+{
+ ulonglong t1= my_timer_nanoseconds();
+ ulonglong t2;
+ int i;
+ int backward= 0;
+ int nonzero= 0;
+
+ for (i=0 ; i < LOOP_COUNT ; i++)
+ {
+ t2= my_timer_nanoseconds();
+ if (t1 > t2)
+ backward++;
+ if (t2 != 0)
+ nonzero++;
+ t1= t2;
+ }
+
+ ok((backward == 0), "The nanosecond timer is increasing");
+
+ if (myt.nanoseconds.routine != 0)
+ ok((nonzero != 0), "The nanosecond timer is implemented");
+ else
+ ok((nonzero == 0), "The nanosecond timer is not implemented and returns 0");
+}
+
+void test_microsecond()
+{
+ ulonglong t1= my_timer_microseconds();
+ ulonglong t2;
+ int i;
+ int backward= 0;
+ int nonzero= 0;
+
+ for (i=0 ; i < LOOP_COUNT ; i++)
+ {
+ t2= my_timer_microseconds();
+ if (t1 > t2)
+ backward++;
+ if (t2 != 0)
+ nonzero++;
+ t1= t2;
+ }
+
+ ok((backward == 0), "The microsecond timer is increasing");
+
+ if (myt.microseconds.routine != 0)
+ ok((nonzero != 0), "The microsecond timer is implemented");
+ else
+ ok((nonzero == 0), "The microsecond timer is not implemented and returns 0");
+}
+
+void test_millisecond()
+{
+ ulonglong t1= my_timer_milliseconds();
+ ulonglong t2;
+ int i;
+ int backward= 0;
+ int nonzero= 0;
+
+ for (i=0 ; i < LOOP_COUNT ; i++)
+ {
+ t2= my_timer_milliseconds();
+ if (t1 > t2)
+ backward++;
+ if (t2 != 0)
+ nonzero++;
+ t1= t2;
+ }
+
+ ok((backward == 0), "The millisecond timer is increasing");
+
+ if (myt.milliseconds.routine != 0)
+ ok((nonzero != 0), "The millisecond timer is implemented");
+ else
+ ok((nonzero == 0), "The millisecond timer is not implemented and returns 0");
+}
+
+void test_tick()
+{
+ ulonglong t1= my_timer_ticks();
+ ulonglong t2;
+ int i;
+ int backward= 0;
+ int nonzero= 0;
+
+ for (i=0 ; i < LOOP_COUNT ; i++)
+ {
+ t2= my_timer_ticks();
+ if (t1 > t2)
+ backward++;
+ if (t2 != 0)
+ nonzero++;
+ t1= t2;
+ }
+
+ ok((backward == 0), "The tick timer is increasing");
+
+ if (myt.ticks.routine != 0)
+ ok((nonzero != 0), "The tick timer is implemented");
+ else
+ ok((nonzero == 0), "The tick timer is not implemented and returns 0");
+}
+
+int main(int argc __attribute__((unused)),
+ char ** argv __attribute__((unused)))
+{
+ plan(11);
+
+ test_init();
+ test_cycle();
+ test_nanosecond();
+ test_microsecond();
+ test_millisecond();
+ test_tick();
+
+ return 0;
+}
+