From eb5ce0ca4206ed4f74009c1b9a3a72407693448b Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Thu, 4 Sep 2014 18:29:58 +0200 Subject: configure: add ARM/AArch64 NEON support Checks for arm_neon.h header. --- configure.ac | 21 +++++++++++++++++++++ include/gf_complete.h | 4 ++++ m4/ax_ext.m4 | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+) diff --git a/configure.ac b/configure.ac index 47d5d62..31ab1fa 100644 --- a/configure.ac +++ b/configure.ac @@ -24,6 +24,27 @@ AC_PROG_CC AX_EXT() +AC_ARG_ENABLE([neon], + AS_HELP_STRING([--disable-neon], [Build without NEON optimizations])) + +AS_IF([test "x$enable_neon" != "xno"], + [noneon_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $SIMD_FLAGS" + AC_CHECK_HEADER([arm_neon.h], + [have_neon=yes], + [have_neon=no + CPPFLAGS=$noneon_CPPFLAGS])], + [have_neon=no + AS_IF([test "x$ax_cv_have_neon_ext" = "xyes"], + [SIMD_FLAGS=""]) + ]) + +AS_IF([test "x$have_neon" = "xno"], + [AS_IF([test "x$enable_neon" = "xyes"], + [AC_MSG_ERROR([neon requested but arm_neon.h not found])]) + ]) +AM_CONDITIONAL([HAVE_NEON], [test "x$have_neon" = "xyes"]) + AC_ARG_ENABLE([sse], AS_HELP_STRING([--disable-sse], [Build without SSE optimizations]), [if test "x$enableval" = "xno" ; then diff --git a/include/gf_complete.h b/include/gf_complete.h index e8ea2ca..c4783e8 100644 --- a/include/gf_complete.h +++ b/include/gf_complete.h @@ -33,6 +33,10 @@ #include #endif +#if defined(ARM_NEON) + #include +#endif + /* These are the different ways to perform multiplication. Not all are implemented for all values of w. diff --git a/m4/ax_ext.m4 b/m4/ax_ext.m4 index cfbb797..c03ccef 100644 --- a/m4/ax_ext.m4 +++ b/m4/ax_ext.m4 @@ -41,6 +41,55 @@ AC_DEFUN([AX_EXT], AC_REQUIRE([AC_CANONICAL_HOST]) case $host_cpu in + aarch64*) + AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64]) + SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64" + + AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext], + [ + # TODO: detect / cross-compile + ax_cv_have_neon_ext=yes + ]) + AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext], + [ + # TODO: detect / cross-compile + ax_cv_have_arm_crypt_ext=yes + ]) + + if test "$ax_cv_have_arm_crypt_ext" = yes; then + AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension]) + fi + + if test "$ax_cv_have_neon_ext" = yes; then + AC_DEFINE(HAVE_NEON,,[Support NEON instructions]) + fi + + if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto, + SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", []) + elif test "$ax_cv_have_arm_crypt_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto, + SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", []) + elif test "$ax_cv_have_neon_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, + SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", []) + fi + ;; + + arm*) + AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext], + [ + # TODO: detect / cross-compile + ax_cv_have_neon_ext=yes + ]) + + if test "$ax_cv_have_neon_ext" = yes; then + AC_DEFINE(HAVE_NEON,,[Support NEON instructions]) + AX_CHECK_COMPILE_FLAG(-mfpu=neon, + SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", []) + fi + ;; + powerpc*) AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], [ -- cgit v1.2.1