diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2020-07-23 18:21:17 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2020-07-23 22:49:26 +0300 |
commit | f1c3db3bf40e07cfd1a6a92209865ee7a98129ca (patch) | |
tree | 2c34f257c6165549a1fed3c53c2eb38106b04734 | |
parent | 886120f33bd3f10e6e6a09920eca1f9ed81044e7 (diff) | |
download | libgcrypt-f1c3db3bf40e07cfd1a6a92209865ee7a98129ca.tar.gz |
tests/bench-slope: improve CPU frequency auto-detection
* configure.ac (gcry_cv_have_asm_volatile_memory): Check also if
assembly memory barrier with input/output register is supported.
* tests/bench-slope.c (auto_ghz_bench): Change to use base operation
that takes two CPU cycles and unroll loop by 1024 operations.
--
CPU frequency is now correctly detected on AWS Graviton CPU (2.3Ghz).
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r-- | configure.ac | 11 | ||||
-rw-r--r-- | tests/bench-slope.c | 57 |
2 files changed, 58 insertions, 10 deletions
diff --git a/configure.ac b/configure.ac index 9a5359c2..96a18d19 100644 --- a/configure.ac +++ b/configure.ac @@ -1117,7 +1117,11 @@ if test "$gcry_cv_have_asm" = "no" ; then [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( - [[void a(void) { __asm__ volatile("":::"memory"); }]])], + [[void a(int x) + { + __asm__ volatile("":::"memory"); + __asm__ volatile("":"+r"(x)::"memory"); + }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi else @@ -1125,7 +1129,10 @@ else [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( - [[void a(void) { asm volatile("":::"memory"); }]])], + [[void a(int x) + { + asm volatile("":::"memory"); + asm volatile("":"+r"(x)::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then diff --git a/tests/bench-slope.c b/tests/bench-slope.c index 63f8f7ae..cfb3dd66 100644 --- a/tests/bench-slope.c +++ b/tests/bench-slope.c @@ -509,18 +509,59 @@ auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen) * function will give cycles/iteration result 1024.0 on high-end CPUs. * With turbo, result will be less and can be used detect turbo-clock. */ - do - { #ifdef HAVE_GCC_ASM_VOLATILE_MEMORY - /* Use memory barrier to prevent compiler from optimizing this loop - * away. */ - - asm volatile ("":::"memory"); + /* Auto-ghz operation takes two CPU cycles to perform. Memory barriers + * are used to prevent compiler from optimizing this loop away. */ + #define AUTO_GHZ_OPERATION \ + asm volatile ("":"+r"(buflen)::"memory"); \ + buflen ^= 1; \ + asm volatile ("":"+r"(buflen)::"memory"); \ + buflen -= 2 #else - /* TODO: Needs alternative way. */ + /* TODO: Needs alternative way of preventing compiler optimizations. + * Mix of XOR and subtraction appears to do the trick for now. */ + #define AUTO_GHZ_OPERATION \ + buflen ^= 1; \ + buflen -= 2 #endif + +#define AUTO_GHZ_OPERATION_2 \ + AUTO_GHZ_OPERATION; \ + AUTO_GHZ_OPERATION + +#define AUTO_GHZ_OPERATION_4 \ + AUTO_GHZ_OPERATION_2; \ + AUTO_GHZ_OPERATION_2 + +#define AUTO_GHZ_OPERATION_8 \ + AUTO_GHZ_OPERATION_4; \ + AUTO_GHZ_OPERATION_4 + +#define AUTO_GHZ_OPERATION_16 \ + AUTO_GHZ_OPERATION_8; \ + AUTO_GHZ_OPERATION_8 + +#define AUTO_GHZ_OPERATION_32 \ + AUTO_GHZ_OPERATION_16; \ + AUTO_GHZ_OPERATION_16 + +#define AUTO_GHZ_OPERATION_64 \ + AUTO_GHZ_OPERATION_32; \ + AUTO_GHZ_OPERATION_32 + +#define AUTO_GHZ_OPERATION_128 \ + AUTO_GHZ_OPERATION_64; \ + AUTO_GHZ_OPERATION_64 + + do + { + /* 1024 auto-ghz operations per loop, total 2048 instructions. */ + AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128; + AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128; + AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128; + AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128; } - while (--buflen); + while (buflen); } static struct bench_ops auto_ghz_detect_ops = { |