summaryrefslogtreecommitdiff
path: root/m4
diff options
context:
space:
mode:
authorHarry van Haaren <harry.van.haaren@intel.com>2020-07-13 13:42:14 +0100
committerIan Stokes <ian.stokes@intel.com>2020-07-13 14:55:25 +0100
commit352b6c7116cdc096c879fc4fa9ed5fe9c2ccef3b (patch)
treea94ec56eb6f987d40d046d885b244866a1ac07f7 /m4
parentb250b39a7aa61881ded34ef1a0fffb6768fd7a49 (diff)
downloadopenvswitch-352b6c7116cdc096c879fc4fa9ed5fe9c2ccef3b.tar.gz
dpif-lookup: add avx512 gather implementation.
This commit adds an AVX-512 dpcls lookup implementation. It uses the AVX-512 SIMD ISA to perform multiple miniflow operations in parallel. To run this implementation, the "avx512f" and "bmi2" ISAs are required. These ISA checks are performed at runtime while probing the subtable implementation. If a CPU does not provide both "avx512f" and "bmi2", then this code does not execute. The avx512 code is built as a separate static library, with added CFLAGS to enable the required ISA features. By building only this static library with avx512 enabled, it is ensured that the main OVS core library is *not* using avx512, and that OVS continues to run as before on CPUs that do not support avx512. The approach taken in this implementation is to use the gather instruction to access the packet miniflow, allowing any miniflow blocks to be loaded into an AVX-512 register. This maximizes the usefulness of the register, and hence this implementation handles any subtable with up to miniflow 8 bits. Note that specialization of these avx512 lookup routines still provides performance value, as the hashing of the resulting data is performed in scalar code, and compile-time loop unrolling occurs when specialized to miniflow bits. This commit checks at configure time if the assembling in use has a known bug in assembling AVX512 code. If this bug is present, all AVX512 code is disabled. Checking the version string of the binutils or assembler is not a good method to detect the issue, as back ported fixes would not be reflected. Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com> Acked-by: William Tu <u9012063@gmail.com> Signed-off-by: Ian Stokes <ian.stokes@intel.com>
Diffstat (limited to 'm4')
-rw-r--r--m4/openvswitch.m430
1 files changed, 30 insertions, 0 deletions
diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4
index add3aabcc..7c9a507e5 100644
--- a/m4/openvswitch.m4
+++ b/m4/openvswitch.m4
@@ -404,6 +404,36 @@ AC_DEFUN([OVS_CHECK_SPHINX],
AC_ARG_VAR([SPHINXBUILD])
AM_CONDITIONAL([HAVE_SPHINX], [test "$SPHINXBUILD" != none])])
+dnl Checks for binutils/assembler known issue with AVX512.
+dnl Due to backports, we probe assembling a reproducer instead of checking
+dnl binutils version string. More details, including ASM dumps and debug here:
+dnl GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90028
+dnl The checking of binutils funcationality instead of LD version is similar
+dnl to as how DPDK proposes to solve this issue:
+dnl http://patches.dpdk.org/patch/71723/
+AC_DEFUN([OVS_CHECK_BINUTILS_AVX512],
+ [AC_CACHE_CHECK(
+ [binutils avx512 assembler checks passing],
+ [ovs_cv_binutils_avx512_good],
+ [dnl Assemble a short snippet to test for issue in "build-aux" dir:
+ mkdir -p build-aux
+ OBJFILE=build-aux/binutils_avx512_check.o
+ GATHER_PARAMS='0x8(,%ymm1,1),%ymm0{%k2}'
+ echo "vpgatherqq $GATHER_PARAMS" | as --64 -o $OBJFILE -
+ if ($CC -dumpmachine | grep x86_64) >/dev/null 2>&1; then
+ if (objdump -d --no-show-raw-insn $OBJFILE | grep -q $GATHER_PARAMS) >/dev/null 2>&1; then
+ ovs_cv_binutils_avx512_good=yes
+ CFLAGS="$CFLAGS -DHAVE_LD_AVX512_GOOD"
+ else
+ ovs_cv_binutils_avx512_good=no
+ fi
+ else
+ ovs_cv_binutils_avx512_good=no
+ fi])
+ rm $OBJFILE
+ AM_CONDITIONAL([HAVE_LD_AVX512_GOOD],
+ [test "$ovs_cv_binutils_avx512_good" = yes])])
+
dnl Checks for dot.
AC_DEFUN([OVS_CHECK_DOT],
[AC_CACHE_CHECK(