summaryrefslogtreecommitdiff
path: root/m4/ax_ext.m4
diff options
context:
space:
mode:
authorbassamtabbara <bassam.tabbara@quantum.com>2016-09-14 20:22:27 +0000
committerbassamtabbara <bassam.tabbara@quantum.com>2016-09-14 20:22:27 +0000
commita6847973cba329ae079d3bd26341a4ec2906f012 (patch)
treecbdb3947d9d86f2fa7d9cee84d3b773e1bb8f2b2 /m4/ax_ext.m4
parent185295f247698f727fd3bb11c4795e1741bb359e (diff)
parent0690ba86a81faff99a3383b5907ddc02a317eea0 (diff)
downloadgf-complete-a6847973cba329ae079d3bd26341a4ec2906f012.tar.gz
Merge branch 'simd-runtime-detection' into 'master'
Support for runtime detection of SIMD This merge request adds support for runtime SIMD detection. The idea is that you would build gf-complete with full SIMD support, and gf_init will select the appropriate function at runtime based on the capabilities of the target machine. This would eliminate the need to build different versions of the code for different processors (you still need to build for different archs). Ceph for example has 3-4 flavors of jerasure on Intel (and does not support PCLMUL optimizations as a result of using to many binaries). Numerous libraries have followed as similar approach include zlib. When reviewing this merge request I recommend that you look at each of the 5 commits independently. The first 3 commits don't change the existing logic. Instead they add debugging functions and test scripts that facilitate testing of the 4th and commit. The 4th commit is where all the new logic goes along with tests. The 5th commit fixes build scripts. I've tested this on x86_64, arm, and aarch64 using QEMU. Numerous tests have been added that help this code and could help with future testing of gf-complete. Also I've compared the functions selected with the old code (prior to runtime SIMD support) with the new code and all functions are identical. Here's a gist with the test results prior to SIMD extensions: https://gist.github.com/bassamtabbara/d9a6dcf0a749b7ab01bc2953a359edec. See merge request !18
Diffstat (limited to 'm4/ax_ext.m4')
-rw-r--r--m4/ax_ext.m4295
1 files changed, 31 insertions, 264 deletions
diff --git a/m4/ax_ext.m4 b/m4/ax_ext.m4
index c03ccef..95c4dbe 100644
--- a/m4/ax_ext.m4
+++ b/m4/ax_ext.m4
@@ -1,40 +1,7 @@
#
-# Updated by KMG to support -DINTEL_SSE for GF-Complete
+# This macro is based on http://www.gnu.org/software/autoconf-archive/ax_ext.html
+# but simplified to do compile time SIMD checks only
#
-# ===========================================================================
-# http://www.gnu.org/software/autoconf-archive/ax_ext.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-# AX_EXT
-#
-# DESCRIPTION
-#
-# Find supported SIMD extensions by requesting cpuid. When an SIMD
-# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
-# compiler supports it. For example, if "sse2" is available, then "-msse2"
-# is added to SIMD_FLAGS.
-#
-# This macro calls:
-#
-# AC_SUBST(SIMD_FLAGS)
-#
-# And defines:
-#
-# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
-#
-# LICENSE
-#
-# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
-# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
-#
-# Copying and distribution of this file, with or without modification, are
-# permitted in any medium without royalty provided the copyright notice
-# and this notice are preserved. This file is offered as-is, without any
-# warranty.
-
-#serial 12
AC_DEFUN([AX_EXT],
[
@@ -45,263 +12,63 @@ AC_DEFUN([AX_EXT],
AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64])
SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64"
- AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
- [
- # TODO: detect / cross-compile
- ax_cv_have_neon_ext=yes
- ])
- AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext],
- [
- # TODO: detect / cross-compile
- ax_cv_have_arm_crypt_ext=yes
- ])
-
- if test "$ax_cv_have_arm_crypt_ext" = yes; then
- AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension])
- fi
-
+ AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
if test "$ax_cv_have_neon_ext" = yes; then
- AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
+ AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, [SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON"], [ax_cv_have_neon_ext=no])
fi
-
- if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto,
- SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", [])
- elif test "$ax_cv_have_arm_crypt_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto,
- SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", [])
- elif test "$ax_cv_have_neon_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd,
- SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", [])
- fi
- ;;
+ ;;
arm*)
- AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
- [
- # TODO: detect / cross-compile
- ax_cv_have_neon_ext=yes
- ])
-
+ AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
if test "$ax_cv_have_neon_ext" = yes; then
- AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
- AX_CHECK_COMPILE_FLAG(-mfpu=neon,
- SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", [])
+ AX_CHECK_COMPILE_FLAG(-mfpu=neon, [SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON"], [ax_cv_have_neon_ext=no])
fi
- ;;
+ ;;
powerpc*)
- AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
- [
- if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
- if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
- ax_cv_have_altivec_ext=yes
- fi
- fi
- ])
-
- if test "$ax_cv_have_altivec_ext" = yes; then
- AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
- AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])
- fi
- ;;
-
-
- i[[3456]]86*|x86_64*|amd64*)
-
- AC_REQUIRE([AX_GCC_X86_CPUID])
- AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
-
- AX_GCC_X86_CPUID(0x00000001)
- ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
- edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
-
- AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
- [
- ax_cv_have_mmx_ext=no
- if test "$((0x$edx>>23&0x01))" = 1; then
- ax_cv_have_mmx_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
- [
- ax_cv_have_sse_ext=no
- if test "$((0x$edx>>25&0x01))" = 1; then
- ax_cv_have_sse_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
- [
- ax_cv_have_sse2_ext=no
- if test "$((0x$edx>>26&0x01))" = 1; then
- ax_cv_have_sse2_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
- [
- ax_cv_have_sse3_ext=no
- if test "$((0x$ecx&0x01))" = 1; then
- ax_cv_have_sse3_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether pclmuldq is supported], [ax_cv_have_pclmuldq_ext],
- [
- ax_cv_have_pclmuldq_ext=no
- if test "$((0x$ecx>>1&0x01))" = 1; then
- ax_cv_have_pclmuldq_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
- [
- ax_cv_have_ssse3_ext=no
- if test "$((0x$ecx>>9&0x01))" = 1; then
- ax_cv_have_ssse3_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
- [
- ax_cv_have_sse41_ext=no
- if test "$((0x$ecx>>19&0x01))" = 1; then
- ax_cv_have_sse41_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
- [
- ax_cv_have_sse42_ext=no
- if test "$((0x$ecx>>20&0x01))" = 1; then
- ax_cv_have_sse42_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
- [
- ax_cv_have_avx_cpu_ext=no
- if test "$((0x$ecx>>28&0x01))" = 1; then
- ax_cv_have_avx_cpu_ext=yes
- fi
- ])
-
- if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
- AX_GCC_X86_AVX_XGETBV(0x00000000)
-
- xgetbv_eax="0"
- if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
- xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
- fi
-
- AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
- [
- ax_cv_have_avx_ext=no
-
- if test "$((0x$ecx>>27&0x01))" = 1; then
- if test "$((0x$xgetbv_eax&0x6))" = 6; then
- ax_cv_have_avx_ext=yes
- fi
- fi
- ])
- if test x"$ax_cv_have_avx_ext" = x"no"; then
- AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
- fi
+ AC_CACHE_CHECK([whether altivec is enabled], [ax_cv_have_altivec_ext], [ax_cv_have_altivec_ext=yes])
+ if test "$ax_cv_have_altivec_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [ax_cv_have_altivec_ext=no])
fi
+ ;;
- if test "$ax_cv_have_mmx_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
- if test x"$ax_cv_support_mmx_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mmmx"
- AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
- else
- AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
- fi
- fi
+ i[[3456]]86*|x86_64*|amd64*)
+ AC_CACHE_CHECK([whether sse is enabled], [ax_cv_have_sse_ext], [ax_cv_have_sse_ext=yes])
if test "$ax_cv_have_sse_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
- if test x"$ax_cv_support_sse_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"
- AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
- else
- AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-msse, [SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"], [ax_cv_have_sse_ext=no])
fi
+ AC_CACHE_CHECK([whether sse2 is enabled], [ax_cv_have_sse2_ext], [ax_cv_have_sse2_ext=yes])
if test "$ax_cv_have_sse2_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
- if test x"$ax_cv_support_sse2_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"
- AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
- else
- AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-msse2, [SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"], [ax_cv_have_sse2_ext=no])
fi
+ AC_CACHE_CHECK([whether sse3 is enabled], [ax_cv_have_sse3_ext], [ax_cv_have_sse3_ext=yes])
if test "$ax_cv_have_sse3_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
- if test x"$ax_cv_support_sse3_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"
- AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
- else
- AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
- fi
- fi
-
- if test "$ax_cv_have_pclmuldq_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mpclmul, ax_cv_support_pclmuldq_ext=yes, [])
- if test x"$ax_cv_support_pclmuldq_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"
- AC_DEFINE(HAVE_PCLMULDQ,,[Support (PCLMULDQ) Carry-Free Muliplication])
- else
- AC_MSG_WARN([Your processor supports pclmuldq instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-msse3, [SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"], [ax_cv_have_sse3_ext=no])
fi
+ AC_CACHE_CHECK([whether ssse3 is enabled], [ax_cv_have_ssse3_ext], [ax_cv_have_ssse3_ext=yes])
if test "$ax_cv_have_ssse3_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
- if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"
- AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
- else
- AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-mssse3, [SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"], [ax_cv_have_ssse3_ext=no])
fi
- if test "$ax_cv_have_sse41_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
- if test x"$ax_cv_support_sse41_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"
- AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
- else
- AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
- fi
+ AC_CACHE_CHECK([whether pclmuldq is enabled], [ax_cv_have_pclmuldq_ext], [ax_cv_have_pclmuldq_ext=yes])
+ if test "$ax_cv_have_pclmuldq_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-mpclmul, [SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"], [ax_cv_have_pclmuldq_ext=no])
fi
- if test "$ax_cv_have_sse42_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
- if test x"$ax_cv_support_sse42_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"
- AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
- else
- AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
- fi
+ AC_CACHE_CHECK([whether sse4.1 is enabled], [ax_cv_have_sse41_ext], [ax_cv_have_sse41_ext=yes])
+ if test "$ax_cv_have_sse41_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse4.1, [SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"], [ax_cv_have_sse41_ext=no])
fi
- if test "$ax_cv_have_avx_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
- if test x"$ax_cv_support_avx_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mavx"
- AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
- else
- AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
- fi
+ AC_CACHE_CHECK([whether sse4.2 is enabled], [ax_cv_have_sse42_ext], [ax_cv_have_sse42_ext=yes])
+ if test "$ax_cv_have_sse42_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse4.2, [SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"], [ax_cv_have_sse42_ext=no])
fi
-
- ;;
+ ;;
esac
AC_SUBST(SIMD_FLAGS)