summaryrefslogtreecommitdiff
path: root/lib/dpif-netdev-extract-avx512.c
diff options
context:
space:
mode:
authorCian Ferriter <cian.ferriter@intel.com>2022-05-17 10:08:18 +0000
committerIlya Maximets <i.maximets@ovn.org>2022-05-30 23:12:51 +0200
commitcb1c64007734cbaa4b23d3e569a550c0beaa4afd (patch)
tree1108e7f98439495e14598627e9292087b8bf0dd0 /lib/dpif-netdev-extract-avx512.c
parentfb85ae4340a51bea26b9a4099448a982834afeff (diff)
downloadopenvswitch-cb1c64007734cbaa4b23d3e569a550c0beaa4afd.tar.gz
acinclude: Add seperate checks for AVX512 ISA.
Checking for each of the required AVX512 ISA separately will allow the compiler to generate some AVX512 code where there is some support in the compiler rather than only generating all AVX512 code when all of it is supported or no AVX512 code at all. For example, in GCC 4.9 where there is just support for AVX512F, this patch will allow building the AVX512 DPIF. Another example, in GCC 5 and 6, most AVX512 code can be generated, just without AVX512VPOPCNTDQ support. Signed-off-by: Cian Ferriter <cian.ferriter@intel.com> Acked-by: Sunil Pai G <sunil.pai.g@intel.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Diffstat (limited to 'lib/dpif-netdev-extract-avx512.c')
-rw-r--r--lib/dpif-netdev-extract-avx512.c64
1 files changed, 46 insertions, 18 deletions
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 4a94dfcfd..12271be17 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -108,13 +108,42 @@ _mm512_maskz_permutex2var_epi8_skx(__mmask64 k_mask,
return v_result_kmskd;
}
-/* Wrapper function required to enable ISA. */
+/* Wrapper function to enable VBMI ISA required by the
+ * _mm512_maskz_permutexvar_epi8 intrinsic. */
+#if HAVE_AVX512VBMI
static inline __m512i
__attribute__((__target__("avx512vbmi")))
_mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
{
return _mm512_maskz_permutexvar_epi8(kmask, idx, a);
}
+#endif
+
+static inline __m512i
+_mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, __m512i v_shuf,
+ __m512i v_pkt0,
+ const uint32_t use_vbmi OVS_UNUSED)
+{
+ /* Permute the packet layout into miniflow blocks shape. */
+ __m512i v512_zeros = _mm512_setzero_si512();
+ __m512i v_blk0;
+#if HAVE_AVX512VBMI
+ if (__builtin_constant_p(use_vbmi) && use_vbmi) {
+ /* As different AVX512 ISA levels have different implementations,
+ * this specializes on the use_vbmi attribute passed in.
+ */
+ v_blk0 = _mm512_maskz_permutexvar_epi8_wrap(k_shuf, v_shuf, v_pkt0);
+
+ } else {
+ v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shuf, v_pkt0, v_shuf,
+ v512_zeros);
+ }
+#else
+ v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shuf, v_pkt0, v_shuf,
+ v512_zeros);
+#endif
+ return v_blk0;
+}
/* This file contains optimized implementations of miniflow_extract()
@@ -481,7 +510,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
odp_port_t in_port,
void *pmd_handle OVS_UNUSED,
const enum MFEX_PROFILES profile_id,
- const uint32_t use_vbmi)
+ const uint32_t use_vbmi OVS_UNUSED)
{
uint32_t hitmask = 0;
struct dp_packet *packet;
@@ -538,19 +567,9 @@ mfex_avx512_process(struct dp_packet_batch *packets,
_mm_storeu_si128((void *) bits, v_bits);
_mm_storeu_si128((void *) blocks, v_blocks01);
- /* Permute the packet layout into miniflow blocks shape.
- * As different AVX512 ISA levels have different implementations,
- * this specializes on the "use_vbmi" attribute passed in.
- */
- __m512i v512_zeros = _mm512_setzero_si512();
- __m512i v_blk0;
- if (__builtin_constant_p(use_vbmi) && use_vbmi) {
- v_blk0 = _mm512_maskz_permutexvar_epi8_wrap(k_shuf, v_shuf,
- v_pkt0);
- } else {
- v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shuf, v_pkt0,
- v_shuf, v512_zeros);
- }
+ __m512i v_blk0 = _mm512_maskz_permutexvar_epi8_selector(k_shuf, v_shuf,
+ v_pkt0,
+ use_vbmi);
__m512i v_blk0_strip = _mm512_and_si512(v_blk0, v_strp);
_mm512_storeu_si512(&blocks[2], v_blk0_strip);
@@ -629,7 +648,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
}
-#define DECLARE_MFEX_FUNC(name, profile) \
+#if HAVE_AVX512VBMI
+#define VBMI_MFEX_FUNC(name, profile) \
uint32_t \
__attribute__((__target__("avx512vbmi"))) \
mfex_avx512_vbmi_##name(struct dp_packet_batch *packets, \
@@ -639,8 +659,12 @@ mfex_avx512_vbmi_##name(struct dp_packet_batch *packets, \
{ \
return mfex_avx512_process(packets, keys, keys_size, in_port, \
pmd_handle, profile, 1); \
-} \
- \
+}
+#else
+#define VBMI_MFEX_FUNC(name, profile)
+#endif
+
+#define BASIC_MFEX_FUNC(name, profile) \
uint32_t \
mfex_avx512_##name(struct dp_packet_batch *packets, \
struct netdev_flow_key *keys, uint32_t keys_size, \
@@ -651,6 +675,10 @@ mfex_avx512_##name(struct dp_packet_batch *packets, \
pmd_handle, profile, 0); \
}
+#define DECLARE_MFEX_FUNC(name, profile) \
+VBMI_MFEX_FUNC(name, profile) \
+BASIC_MFEX_FUNC(name, profile) \
+
/* Each profile gets a single declare here, which specializes the function
* as required.
*/