summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2020-07-23 18:21:17 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2020-07-23 22:49:26 +0300
commitf1c3db3bf40e07cfd1a6a92209865ee7a98129ca (patch)
tree2c34f257c6165549a1fed3c53c2eb38106b04734
parent886120f33bd3f10e6e6a09920eca1f9ed81044e7 (diff)
downloadlibgcrypt-f1c3db3bf40e07cfd1a6a92209865ee7a98129ca.tar.gz
tests/bench-slope: improve CPU frequency auto-detection
* configure.ac (gcry_cv_have_asm_volatile_memory): Check also if assembly memory barrier with input/output register is supported. * tests/bench-slope.c (auto_ghz_bench): Change to use base operation that takes two CPU cycles and unroll loop by 1024 operations. -- CPU frequency is now correctly detected on AWS Graviton CPU (2.3Ghz). Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r--configure.ac11
-rw-r--r--tests/bench-slope.c57
2 files changed, 58 insertions, 10 deletions
diff --git a/configure.ac b/configure.ac
index 9a5359c2..96a18d19 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1117,7 +1117,11 @@ if test "$gcry_cv_have_asm" = "no" ; then
[gcry_cv_have_asm_volatile_memory],
[gcry_cv_have_asm_volatile_memory=no
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
- [[void a(void) { __asm__ volatile("":::"memory"); }]])],
+ [[void a(int x)
+ {
+ __asm__ volatile("":::"memory");
+ __asm__ volatile("":"+r"(x)::"memory");
+ }]])],
[gcry_cv_have_asm_volatile_memory=yes])])
fi
else
@@ -1125,7 +1129,10 @@ else
[gcry_cv_have_asm_volatile_memory],
[gcry_cv_have_asm_volatile_memory=no
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
- [[void a(void) { asm volatile("":::"memory"); }]])],
+ [[void a(int x)
+ {
+ asm volatile("":::"memory");
+ asm volatile("":"+r"(x)::"memory"); }]])],
[gcry_cv_have_asm_volatile_memory=yes])])
fi
if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then
diff --git a/tests/bench-slope.c b/tests/bench-slope.c
index 63f8f7ae..cfb3dd66 100644
--- a/tests/bench-slope.c
+++ b/tests/bench-slope.c
@@ -509,18 +509,59 @@ auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen)
* function will give cycles/iteration result 1024.0 on high-end CPUs.
* With turbo, result will be less and can be used detect turbo-clock. */
- do
- {
#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
- /* Use memory barrier to prevent compiler from optimizing this loop
- * away. */
-
- asm volatile ("":::"memory");
+ /* Auto-ghz operation takes two CPU cycles to perform. Memory barriers
+ * are used to prevent compiler from optimizing this loop away. */
+ #define AUTO_GHZ_OPERATION \
+ asm volatile ("":"+r"(buflen)::"memory"); \
+ buflen ^= 1; \
+ asm volatile ("":"+r"(buflen)::"memory"); \
+ buflen -= 2
#else
- /* TODO: Needs alternative way. */
+ /* TODO: Needs alternative way of preventing compiler optimizations.
+ * Mix of XOR and subtraction appears to do the trick for now. */
+ #define AUTO_GHZ_OPERATION \
+ buflen ^= 1; \
+ buflen -= 2
#endif
+
+#define AUTO_GHZ_OPERATION_2 \
+ AUTO_GHZ_OPERATION; \
+ AUTO_GHZ_OPERATION
+
+#define AUTO_GHZ_OPERATION_4 \
+ AUTO_GHZ_OPERATION_2; \
+ AUTO_GHZ_OPERATION_2
+
+#define AUTO_GHZ_OPERATION_8 \
+ AUTO_GHZ_OPERATION_4; \
+ AUTO_GHZ_OPERATION_4
+
+#define AUTO_GHZ_OPERATION_16 \
+ AUTO_GHZ_OPERATION_8; \
+ AUTO_GHZ_OPERATION_8
+
+#define AUTO_GHZ_OPERATION_32 \
+ AUTO_GHZ_OPERATION_16; \
+ AUTO_GHZ_OPERATION_16
+
+#define AUTO_GHZ_OPERATION_64 \
+ AUTO_GHZ_OPERATION_32; \
+ AUTO_GHZ_OPERATION_32
+
+#define AUTO_GHZ_OPERATION_128 \
+ AUTO_GHZ_OPERATION_64; \
+ AUTO_GHZ_OPERATION_64
+
+ do
+ {
+ /* 1024 auto-ghz operations per loop, total 2048 instructions. */
+ AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
+ AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
+ AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
+ AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
}
- while (--buflen);
+ while (buflen);
}
static struct bench_ops auto_ghz_detect_ops = {