/* * Copyright (c) 2021 Intel. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include "cpu.h" #include "dp-packet.h" #include "dpif-netdev-private-dpcls.h" #include "dpif-netdev-private-extract.h" #include "dpif-netdev-private-thread.h" #include "flow.h" #include "openvswitch/vlog.h" #include "ovs-thread.h" #include "util.h" VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract); /* Variable to hold the default MFEX implementation. */ static ATOMIC(miniflow_extract_func) default_mfex_func; #if MFEX_IMPL_AVX512_CHECK static int32_t avx512_isa_probe(bool needs_vbmi) { static enum ovs_cpu_isa isa_required[] = { OVS_CPU_ISA_X86_AVX512F, OVS_CPU_ISA_X86_AVX512BW, OVS_CPU_ISA_X86_BMI2, }; for (uint32_t i = 0; i < ARRAY_SIZE(isa_required); i++) { if (!cpu_has_isa(isa_required[i])) { return -ENOTSUP; } } if (needs_vbmi && !cpu_has_isa(OVS_CPU_ISA_X86_AVX512VBMI)) { return -ENOTSUP; } return 0; } /* Probe functions to check ISA requirements. */ static int32_t mfex_avx512_probe(void) { return avx512_isa_probe(false); } #if HAVE_AVX512VBMI static int32_t mfex_avx512_vbmi_probe(void) { return avx512_isa_probe(true); } #endif #endif /* Implementations of available extract options and * the implementations are always in order of preference. */ static struct dpif_miniflow_extract_impl mfex_impls[] = { [MFEX_IMPL_AUTOVALIDATOR] = { .probe = NULL, .extract_func = dpif_miniflow_extract_autovalidator, .name = "autovalidator", }, [MFEX_IMPL_SCALAR] = { .probe = NULL, .extract_func = NULL, .name = "scalar", }, [MFEX_IMPL_STUDY] = { .probe = NULL, .extract_func = mfex_study_traffic, .name = "study", }, /* Compile in implementations only if the compiler ISA checks pass. */ #if MFEX_IMPL_AVX512_CHECK #if HAVE_AVX512VBMI [MFEX_IMPL_VBMI_IPv4_UDP] = { .probe = mfex_avx512_vbmi_probe, .extract_func = mfex_avx512_vbmi_ip_udp, .name = "avx512_vbmi_ipv4_udp", }, #endif [MFEX_IMPL_IPv4_UDP] = { .probe = mfex_avx512_probe, .extract_func = mfex_avx512_ip_udp, .name = "avx512_ipv4_udp", }, #if HAVE_AVX512VBMI [MFEX_IMPL_VBMI_IPv4_TCP] = { .probe = mfex_avx512_vbmi_probe, .extract_func = mfex_avx512_vbmi_ip_tcp, .name = "avx512_vbmi_ipv4_tcp", }, #endif [MFEX_IMPL_IPv4_TCP] = { .probe = mfex_avx512_probe, .extract_func = mfex_avx512_ip_tcp, .name = "avx512_ipv4_tcp", }, #if HAVE_AVX512VBMI [MFEX_IMPL_VBMI_DOT1Q_IPv4_UDP] = { .probe = mfex_avx512_vbmi_probe, .extract_func = mfex_avx512_vbmi_dot1q_ip_udp, .name = "avx512_vbmi_dot1q_ipv4_udp", }, #endif [MFEX_IMPL_DOT1Q_IPv4_UDP] = { .probe = mfex_avx512_probe, .extract_func = mfex_avx512_dot1q_ip_udp, .name = "avx512_dot1q_ipv4_udp", }, #if HAVE_AVX512VBMI [MFEX_IMPL_VBMI_DOT1Q_IPv4_TCP] = { .probe = mfex_avx512_vbmi_probe, .extract_func = mfex_avx512_vbmi_dot1q_ip_tcp, .name = "avx512_vbmi_dot1q_ipv4_tcp", }, #endif [MFEX_IMPL_DOT1Q_IPv4_TCP] = { .probe = mfex_avx512_probe, .extract_func = mfex_avx512_dot1q_ip_tcp, .name = "avx512_dot1q_ipv4_tcp", }, #if HAVE_AVX512VBMI [MFEX_IMPL_VBMI_IPv6_UDP] = { .probe = mfex_avx512_vbmi_probe, .extract_func = mfex_avx512_vbmi_ipv6_udp, .name = "avx512_vbmi_ipv6_udp", }, #endif [MFEX_IMPL_IPv6_UDP] = { .probe = mfex_avx512_probe, .extract_func = mfex_avx512_ipv6_udp, .name = "avx512_ipv6_udp", }, #if HAVE_AVX512VBMI [MFEX_IMPL_VBMI_IPv6_TCP] = { .probe = mfex_avx512_vbmi_probe, .extract_func = mfex_avx512_vbmi_ipv6_tcp, .name = "avx512_vbmi_ipv6_tcp", }, #endif [MFEX_IMPL_IPv6_TCP] = { .probe = mfex_avx512_probe, .extract_func = mfex_avx512_ipv6_tcp, .name = "avx512_ipv6_tcp", }, #if HAVE_AVX512VBMI [MFEX_IMPL_VBMI_DOT1Q_IPv6_TCP] = { .probe = mfex_avx512_vbmi_probe, .extract_func = mfex_avx512_vbmi_dot1q_ipv6_tcp, .name = "avx512_vbmi_avx512_dot1q_ipv6_tcp", }, #endif [MFEX_IMPL_DOT1Q_IPv6_TCP] = { .probe = mfex_avx512_probe, .extract_func = mfex_avx512_dot1q_ipv6_tcp, .name = "avx512_dot1q_ipv6_tcp", }, #if HAVE_AVX512VBMI [MFEX_IMPL_VBMI_DOT1Q_IPv6_UDP] = { .probe = mfex_avx512_vbmi_probe, .extract_func = mfex_avx512_vbmi_dot1q_ipv6_udp, .name = "avx512_vbmi_avx512_dot1q_ipv6_udp", }, #endif [MFEX_IMPL_DOT1Q_IPv6_UDP] = { .probe = mfex_avx512_probe, .extract_func = mfex_avx512_dot1q_ipv6_udp, .name = "avx512_dot1q_ipv6_udp", }, #if HAVE_AVX512VBMI [MFEX_IMPL_VBMI_IPv4_NVGRE] = { .probe = mfex_avx512_vbmi_probe, .extract_func = mfex_avx512_vbmi_ip_nvgre, .name = "avx512_vbmi_ipv4_nvgre", }, #endif [MFEX_IMPL_IPv4_NVGRE] = { .probe = mfex_avx512_probe, .extract_func = mfex_avx512_ip_nvgre, .name = "avx512_ipv4_nvgre", }, #endif }; BUILD_ASSERT_DECL(MFEX_IMPL_MAX == ARRAY_SIZE(mfex_impls)); void dpif_miniflow_extract_init(void) { atomic_uintptr_t *mfex_func = (void *)&default_mfex_func; #ifdef MFEX_AUTOVALIDATOR_DEFAULT int mfex_idx = MFEX_IMPL_AUTOVALIDATOR; #else int mfex_idx = MFEX_IMPL_SCALAR; #endif /* Call probe on each impl, and cache the result. */ for (int i = 0; i < MFEX_IMPL_MAX; i++) { bool avail = true; if (mfex_impls[i].probe) { /* Return zero is success, non-zero means error. */ avail = (mfex_impls[i].probe() == 0); } VLOG_DBG("Miniflow Extract implementation '%s' %s available.", mfex_impls[i].name, avail ? "is" : "is not"); mfex_impls[i].available = avail; } /* For the first call, this will be choosen based on the * compile time flag. */ VLOG_INFO("Default MFEX Extract implementation is %s.\n", mfex_impls[mfex_idx].name); atomic_store_relaxed(mfex_func, (uintptr_t) mfex_impls [mfex_idx].extract_func); } miniflow_extract_func dp_mfex_impl_get_default(void) { miniflow_extract_func return_func; atomic_uintptr_t *mfex_func = (void *)&default_mfex_func; atomic_read_relaxed(mfex_func, (uintptr_t *) &return_func); return return_func; } int dp_mfex_impl_set_default_by_name(const char *name) { miniflow_extract_func new_default; atomic_uintptr_t *mfex_func = (void *)&default_mfex_func; int err = dp_mfex_impl_get_by_name(name, &new_default); if (!err) { atomic_store_relaxed(mfex_func, (uintptr_t) new_default); } return err; } void dp_mfex_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list, size_t pmd_list_size) { /* Add all MFEX functions to reply string. */ ds_put_cstr(reply, "Available MFEX implementations:\n"); for (int i = 0; i < MFEX_IMPL_MAX; i++) { ds_put_format(reply, " %s (available: %s pmds: ", mfex_impls[i].name, mfex_impls[i].available ? "True" : "False"); for (size_t j = 0; j < pmd_list_size; j++) { struct dp_netdev_pmd_thread *pmd = pmd_list[j]; if (pmd->core_id == NON_PMD_CORE_ID) { continue; } if (pmd->miniflow_extract_opt == mfex_impls[i].extract_func) { ds_put_format(reply, "%u,", pmd->core_id); } } ds_chomp(reply, ','); if (ds_last(reply) == ' ') { ds_put_cstr(reply, "none"); } ds_put_cstr(reply, ")\n"); } } /* This function checks all available MFEX implementations, and selects and * returns the function pointer to the one requested by "name". If nothing * is found it returns error. */ int dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func *out_func) { if (!name || !out_func) { return -EINVAL; } for (int i = 0; i < MFEX_IMPL_MAX; i++) { if (strcmp(mfex_impls[i].name, name) == 0) { /* Check available is set before exec. */ if (!mfex_impls[i].available) { *out_func = NULL; return -ENODEV; } *out_func = mfex_impls[i].extract_func; return 0; } } return -ENOENT; } struct dpif_miniflow_extract_impl * dpif_mfex_impl_info_get(void) { return mfex_impls; } uint32_t dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets, struct netdev_flow_key *keys, uint32_t keys_size, odp_port_t in_port, struct dp_netdev_pmd_thread *pmd_handle) { const size_t cnt = dp_packet_batch_size(packets); uint16_t good_l2_5_ofs[NETDEV_MAX_BURST]; uint16_t good_l3_ofs[NETDEV_MAX_BURST]; uint16_t good_l4_ofs[NETDEV_MAX_BURST]; uint16_t good_l2_pad_size[NETDEV_MAX_BURST]; struct dp_packet *packet; struct dp_netdev_pmd_thread *pmd = pmd_handle; struct netdev_flow_key test_keys[NETDEV_MAX_BURST]; if (keys_size < cnt) { atomic_store_relaxed(&pmd->miniflow_extract_opt, NULL); VLOG_ERR("Invalid key size supplied, Key_size: %d less than" "batch_size: %" PRIuSIZE"\n", keys_size, cnt); VLOG_ERR("Autovalidatior is disabled.\n"); return 0; } /* Run scalar miniflow_extract to get default result. */ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) { pkt_metadata_init(&packet->md, in_port); miniflow_extract(packet, &keys[i].mf); /* Store known good metadata to compare with optimized metadata. */ good_l2_5_ofs[i] = packet->l2_5_ofs; good_l3_ofs[i] = packet->l3_ofs; good_l4_ofs[i] = packet->l4_ofs; good_l2_pad_size[i] = packet->l2_pad_size; } uint32_t batch_failed = 0; /* Iterate through each version of miniflow implementations. */ for (int j = MFEX_IMPL_START_IDX; j < MFEX_IMPL_MAX; j++) { if (!mfex_impls[j].available) { continue; } /* Reset keys and offsets before each implementation. */ memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key)); DP_PACKET_BATCH_FOR_EACH (i, packet, packets) { dp_packet_reset_offsets(packet); } /* Call optimized miniflow for each batch of packet. */ uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys, keys_size, in_port, pmd_handle); /* Do a miniflow compare for bits, blocks and offsets for all the * classified packets in the hitmask marked by set bits. */ while (hit_mask) { /* Index for the set bit. */ uint32_t i = raw_ctz(hit_mask); /* Set the index in hitmask to Zero. */ hit_mask &= (hit_mask - 1); uint32_t failed = 0; struct ds log_msg = DS_EMPTY_INITIALIZER; ds_put_format(&log_msg, "MFEX autovalidator pkt %d\n", i); /* Check miniflow bits are equal. */ if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) || (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) { ds_put_format(&log_msg, "Autovalidation map failed\n" "Good: 0x%llx 0x%llx Test: 0x%llx 0x%llx\n", keys[i].mf.map.bits[0], keys[i].mf.map.bits[1], test_keys[i].mf.map.bits[0], test_keys[i].mf.map.bits[1]); failed = 1; } if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) { uint32_t block_cnt = miniflow_n_values(&keys[i].mf); uint32_t test_block_cnt = miniflow_n_values(&test_keys[i].mf); ds_put_format(&log_msg, "Autovalidation blocks failed\n" "Good hex:\n"); ds_put_hex_dump(&log_msg, &keys[i].buf, block_cnt * 8, 0, false); ds_put_format(&log_msg, "Test hex:\n"); ds_put_hex_dump(&log_msg, &test_keys[i].buf, test_block_cnt * 8, 0, false); failed = 1; } packet = packets->packets[i]; if ((packet->l2_pad_size != good_l2_pad_size[i]) || (packet->l2_5_ofs != good_l2_5_ofs[i]) || (packet->l3_ofs != good_l3_ofs[i]) || (packet->l4_ofs != good_l4_ofs[i])) { ds_put_format(&log_msg, "Autovalidation packet offsets failed\n"); ds_put_format(&log_msg, "Good offsets: " "l2_pad_size: %"PRIu16", l2_5_ofs: %"PRIu16", " "l3_ofs: %"PRIu16", l4_ofs: %"PRIu16"\n", good_l2_pad_size[i], good_l2_5_ofs[i], good_l3_ofs[i], good_l4_ofs[i]); ds_put_format(&log_msg, "Test offsets: " "l2_pad_size: %"PRIu16", l2_5_ofs: %"PRIu16", " "l3_ofs: %"PRIu16", l4_ofs: %"PRIu16"\n", packet->l2_pad_size, packet->l2_5_ofs, packet->l3_ofs, packet->l4_ofs); failed = 1; } if (failed) { VLOG_ERR("Autovalidation for %s failed in pkt %d," " disabling.", mfex_impls[j].name, i); VLOG_ERR("Autovalidation failure details:\n%s", ds_cstr(&log_msg)); batch_failed = 1; } ds_destroy(&log_msg); } } /* Having dumped the debug info for the batch, disable autovalidator. */ if (batch_failed) { atomic_store_relaxed(&pmd->miniflow_extract_opt, NULL); } /* Preserve packet correctness by storing back the good offsets in * packets back. */ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) { packet->l2_5_ofs = good_l2_5_ofs[i]; packet->l3_ofs = good_l3_ofs[i]; packet->l4_ofs = good_l4_ofs[i]; packet->l2_pad_size = good_l2_pad_size[i]; } /* Returning zero implies no packets were hit by autovalidation. This * simplifies unit-tests as changing --enable-mfex-default-autovalidator * would pass/fail. By always returning zero, autovalidator is a little * slower, but we gain consistency in testing. The auto-validator is only * meant to test different implementaions against a batch of packets * without incrementing hit counters. */ return 0; }