diff options
-rw-r--r-- | configure.ac | 11 | ||||
-rw-r--r-- | tests/bench-slope.c | 57 |
2 files changed, 58 insertions, 10 deletions
diff --git a/configure.ac b/configure.ac index 9a5359c2..96a18d19 100644 --- a/configure.ac +++ b/configure.ac @@ -1117,7 +1117,11 @@ if test "$gcry_cv_have_asm" = "no" ; then [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( - [[void a(void) { __asm__ volatile("":::"memory"); }]])], + [[void a(int x) + { + __asm__ volatile("":::"memory"); + __asm__ volatile("":"+r"(x)::"memory"); + }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi else @@ -1125,7 +1129,10 @@ else [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( - [[void a(void) { asm volatile("":::"memory"); }]])], + [[void a(int x) + { + asm volatile("":::"memory"); + asm volatile("":"+r"(x)::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then diff --git a/tests/bench-slope.c b/tests/bench-slope.c index 63f8f7ae..cfb3dd66 100644 --- a/tests/bench-slope.c +++ b/tests/bench-slope.c @@ -509,18 +509,59 @@ auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen) * function will give cycles/iteration result 1024.0 on high-end CPUs. * With turbo, result will be less and can be used detect turbo-clock. */ - do - { #ifdef HAVE_GCC_ASM_VOLATILE_MEMORY - /* Use memory barrier to prevent compiler from optimizing this loop - * away. */ - - asm volatile ("":::"memory"); + /* Auto-ghz operation takes two CPU cycles to perform. Memory barriers + * are used to prevent compiler from optimizing this loop away. */ + #define AUTO_GHZ_OPERATION \ + asm volatile ("":"+r"(buflen)::"memory"); \ + buflen ^= 1; \ + asm volatile ("":"+r"(buflen)::"memory"); \ + buflen -= 2 #else - /* TODO: Needs alternative way. */ + /* TODO: Needs alternative way of preventing compiler optimizations. + * Mix of XOR and subtraction appears to do the trick for now. */ + #define AUTO_GHZ_OPERATION \ + buflen ^= 1; \ + buflen -= 2 #endif + +#define AUTO_GHZ_OPERATION_2 \ + AUTO_GHZ_OPERATION; \ + AUTO_GHZ_OPERATION + +#define AUTO_GHZ_OPERATION_4 \ + AUTO_GHZ_OPERATION_2; \ + AUTO_GHZ_OPERATION_2 + +#define AUTO_GHZ_OPERATION_8 \ + AUTO_GHZ_OPERATION_4; \ + AUTO_GHZ_OPERATION_4 + +#define AUTO_GHZ_OPERATION_16 \ + AUTO_GHZ_OPERATION_8; \ + AUTO_GHZ_OPERATION_8 + +#define AUTO_GHZ_OPERATION_32 \ + AUTO_GHZ_OPERATION_16; \ + AUTO_GHZ_OPERATION_16 + +#define AUTO_GHZ_OPERATION_64 \ + AUTO_GHZ_OPERATION_32; \ + AUTO_GHZ_OPERATION_32 + +#define AUTO_GHZ_OPERATION_128 \ + AUTO_GHZ_OPERATION_64; \ + AUTO_GHZ_OPERATION_64 + + do + { + /* 1024 auto-ghz operations per loop, total 2048 instructions. */ + AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128; + AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128; + AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128; + AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128; } - while (--buflen); + while (buflen); } static struct bench_ops auto_ghz_detect_ops = { |