summaryrefslogtreecommitdiff
path: root/mbench/timp.h
blob: 2450241b17d14492fee8ad16c9cce7e6eeb2be42 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/*
Copyright 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
Contributed by Patrick Pelissier, INRIA.

This file is part of the MPFR Library.

The MPFR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.

The MPFR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the MPFR Library; see the file COPYING.LIB.  If not, write to
the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
MA 02110-1301, USA. */

#ifndef __TIMP__H__
#define __TIMP__H__

/* Usage:
 *  Before doing the measure, call TIMP_OVERHEAD ();
 *  Then unsigned long long t = TIMP_MEASURE (f(x));
 *  to measure the # of cycles taken by the call to f(x).
 */

#define TIMP_VERSION 1*100+0*10+0

#ifndef __GNUC__
# error  CC != GCC 
#endif

/* High accuracy timing */
#if defined (__i386__) || defined(__amd64__)

#define timp_rdtsc_before(time)           \
        __asm__ __volatile__(             \
                ".align 64\n\t"           \
                "xorl %%eax,%%eax\n\t"    \
                "cpuid\n\t"               \
                "rdtsc\n\t"               \
                "movl %%eax,(%0)\n\t"     \
                "movl %%edx,4(%0)\n\t"    \
                "xorl %%eax,%%eax\n\t"    \
                "cpuid\n\t"               \
                : /* no output */         \
                : "S"(&time)              \
                : "eax", "ebx", "ecx", "edx", "memory")

#define timp_rdtsc_after(time)            \
        __asm__ __volatile__(             \
                "xorl %%eax,%%eax\n\t"    \
                "cpuid\n\t"               \
                "rdtsc\n\t"               \
                "movl %%eax,(%0)\n\t"     \
                "movl %%edx,4(%0)\n\t"    \
                "xorl %%eax,%%eax\n\t"    \
                "cpuid\n\t"               \
                : /* no output */         \
                : "S"(&time)              \
                : "eax", "ebx", "ecx", "edx", "memory")

#elif defined (__ia64)

#define timp_rdtsc()                                           \
({ unsigned long long int x;                                   \
  __asm__ __volatile__("mov %0=ar.itc" : "=r"(x) :: "memory"); \
  x; })
#define timp_rdtsc_before(time) (time = timp_rdtsc())
#define timp_rdtsc_after(time)  (time = timp_rdtsc())

#elif defined (__alpha)

#define timp_rdtsc()                              \
({ unsigned long long int x;                      \
   __asm__ volatile ("rpcc %0\n\t" : "=r" (x));   \
   x; })
#define timp_rdtsc_before(time) (time = tpp_rdtsc())
#define timp_rdtsc_after(time)  (time = tpp_rdtsc())

#else
# error Unsupported CPU
#endif

/* We do several measures and keep the minimum to avoid counting
 * hardware interruption cycles.
 * The filling of the CPU cache is done because we do several loops,
 * and get the minimum.
 * Declaring num_cycle as "volatile" is to avoid optimisation when it is
 * possible (To properly calcul overhead).
 * overhead is calculated outside by a call to:
 *   overhead = MEASURE("overhead", ;)
 * Use a lot the preprocessor.
 * It is a macro to be very flexible.
 */
static unsigned long long int timp_overhead = 0;

#define TIMP_NUM_TRY  4327
#define TIMP_MAX_WAIT_FOR_MEASURE 10000000ULL

#define TIMP_MEASURE(CODE)                                            \
  ({                                                                  \
  volatile unsigned long long int num_cycle, num_cycle2;              \
  unsigned long long min_num_cycle, start_num_cycle;                  \
  int _i;                                                             \
  timp_rdtsc_before (start_num_cycle);                                \
  min_num_cycle = 0xFFFFFFFFFFFFFFFFLL;                               \
  for(_i = 0 ; _i < TIMP_NUM_TRY ; _i++) {                            \
    timp_rdtsc_before(num_cycle);                                     \
    CODE;                                                             \
    timp_rdtsc_after(num_cycle2);                                     \
    num_cycle =  num_cycle2 - num_cycle;                              \
    if (num_cycle < min_num_cycle)                                    \
      min_num_cycle = num_cycle;                                      \
    if (num_cycle2 - start_num_cycle > TIMP_MAX_WAIT_FOR_MEASURE)     \
      break;                                                          \
  }                                                                   \
  min_num_cycle - timp_overhead; })

#define TIMP_OVERHEAD()                                               \
  (timp_overhead = 0, timp_overhead = TIMP_MEASURE((void) 0) )

#endif /* __TIMP__H__ */