diff options
author | David Marchand <david.marchand@redhat.com> | 2021-11-10 17:53:41 +0100 |
---|---|---|
committer | Ilya Maximets <i.maximets@ovn.org> | 2022-01-11 21:34:56 +0100 |
commit | b84386fa9ac801adad6e725e0de43d9340619e67 (patch) | |
tree | 31a2cbab1e5925622a3c3ae6eee596c2c6dc70ae | |
parent | 356f3620686a557d0e8976cdf73472f03d4f4d07 (diff) | |
download | openvswitch-b84386fa9ac801adad6e725e0de43d9340619e67.tar.gz |
dpdk: Support running PMD threads on any core.
Previously in OVS, a PMD thread running on cpu X used lcore X.
This assumption limited OVS to run PMD threads on physical cpu <
RTE_MAX_LCORE.
DPDK 20.08 introduced a new API that associates a non-EAL thread to a free
lcore. This new API does not change the thread characteristics (like CPU
affinity) and let OVS run its PMD threads on any cpu regardless of
RTE_MAX_LCORE.
The DPDK multiprocess feature is not compatible with this new API and is
disabled.
DPDK still limits the number of lcores to RTE_MAX_LCORE (128 on x86_64)
which should be enough for OVS pmd threads (hopefully).
DPDK lcore/OVS pmd threads mapping are logged at threads when trying to
attach a OVS PMD thread, and when detaching.
A new command is added to help get DPDK point of view of the DPDK lcores
at any time:
$ ovs-appctl dpdk/lcore-list
lcore 0, socket 0, role RTE, cpuset 0
lcore 1, socket 0, role NON_EAL, cpuset 1
lcore 2, socket 0, role NON_EAL, cpuset 15
Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Kevin Traynor <ktraynor@redhat.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
-rw-r--r-- | Documentation/howto/dpdk.rst | 5 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | lib/dpdk-stub.c | 9 | ||||
-rw-r--r-- | lib/dpdk-unixctl.man | 3 | ||||
-rw-r--r-- | lib/dpdk.c | 51 | ||||
-rw-r--r-- | lib/dpdk.h | 3 | ||||
-rw-r--r-- | lib/dpif-netdev.c | 10 |
7 files changed, 71 insertions, 12 deletions
diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst index 70b64881a..81f236d3b 100644 --- a/Documentation/howto/dpdk.rst +++ b/Documentation/howto/dpdk.rst @@ -402,6 +402,11 @@ Supported actions for hardware offload are: - Clone/output (tnl_push and output) for encapsulating over a tunnel. - Tunnel pop, for packets received on physical ports. +Multiprocess +------------ + +This DPDK feature is not supported and disabled during OVS initialization. + Further Reading --------------- @@ -17,6 +17,8 @@ Post-v2.16.0 * Add hardware offload support for GRE flows (experimental). Available only if DPDK experimantal APIs enabled during the build. * Add support for DPDK 21.11. + * Forbid use of DPDK multiprocess feature. + * Add support for running threads on cores >= RTE_MAX_LCORE. - Python: * For SSL support, the use of the pyOpenSSL library has been replaced with the native 'ssl' module. diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c index c332c217c..3eee1f485 100644 --- a/lib/dpdk-stub.c +++ b/lib/dpdk-stub.c @@ -38,10 +38,15 @@ dpdk_init(const struct smap *ovs_other_config) } } +bool +dpdk_attach_thread(unsigned cpu OVS_UNUSED) +{ + return false; +} + void -dpdk_set_lcore_id(unsigned cpu OVS_UNUSED) +dpdk_detach_thread(void) { - /* Nothing */ } const char * diff --git a/lib/dpdk-unixctl.man b/lib/dpdk-unixctl.man index a0d1fa2ea..5bac80638 100644 --- a/lib/dpdk-unixctl.man +++ b/lib/dpdk-unixctl.man @@ -1,5 +1,8 @@ .SS "DPDK COMMANDS" These commands manage DPDK components. +.IP "\fBdpdk/lcore-list\fR" +Lists the DPDK lcores and their cpu affinity. +When RTE_MAX_LCORE lcores are registered, some OVS PMD threads won't appear. .IP "\fBdpdk/log-list\fR" Lists all DPDK components that emit logs and their logging levels. .IP "\fBdpdk/log-set\fR [\fIspec\fR]" diff --git a/lib/dpdk.c b/lib/dpdk.c index 38f142017..6886fbd9d 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -35,6 +35,7 @@ #include "netdev-offload-provider.h" #include "openvswitch/dynamic-string.h" #include "openvswitch/vlog.h" +#include "ovs-atomic.h" #include "ovs-numa.h" #include "smap.h" #include "svec.h" @@ -50,10 +51,11 @@ static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */ static bool vhost_iommu_enabled = false; /* Status of vHost IOMMU support */ static bool vhost_postcopy_enabled = false; /* Status of vHost POSTCOPY * support. */ -static bool dpdk_initialized = false; /* Indicates successful initialization - * of DPDK. */ static bool per_port_memory = false; /* Status of per port memory support */ +/* Indicates successful initialization of DPDK. */ +static atomic_bool dpdk_initialized = ATOMIC_VAR_INIT(false); + static int process_vhost_flags(char *flag, const char *default_val, int size, const struct smap *ovs_other_config, @@ -477,6 +479,12 @@ dpdk_init__(const struct smap *ovs_other_config) return false; } + if (!rte_mp_disable()) { + VLOG_EMER("Could not disable multiprocess, DPDK won't be available."); + rte_eal_cleanup(); + return false; + } + if (VLOG_IS_DBG_ENABLED()) { size_t size; char *response = NULL; @@ -496,6 +504,8 @@ dpdk_init__(const struct smap *ovs_other_config) } } + unixctl_command_register("dpdk/lcore-list", "", 0, 0, + dpdk_unixctl_mem_stream, rte_lcore_dump); unixctl_command_register("dpdk/log-list", "", 0, 0, dpdk_unixctl_mem_stream, rte_log_dump); unixctl_command_register("dpdk/log-set", "{level | pattern:level}", 0, @@ -545,7 +555,7 @@ dpdk_init(const struct smap *ovs_other_config) } else { VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable"); } - dpdk_initialized = enabled; + atomic_store_relaxed(&dpdk_initialized, enabled); } const char * @@ -575,15 +585,40 @@ dpdk_per_port_memory(void) bool dpdk_available(void) { - return dpdk_initialized; + bool initialized; + + atomic_read_relaxed(&dpdk_initialized, &initialized); + return initialized; } -void -dpdk_set_lcore_id(unsigned cpu) +bool +dpdk_attach_thread(unsigned cpu) { /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */ ovs_assert(cpu != NON_PMD_CORE_ID); - RTE_PER_LCORE(_lcore_id) = cpu; + + if (!dpdk_available()) { + return false; + } + + if (rte_thread_register() < 0) { + VLOG_WARN("DPDK max threads count has been reached. " + "PMD thread performance may be impacted."); + return false; + } + + VLOG_INFO("PMD thread uses DPDK lcore %u.", rte_lcore_id()); + return true; +} + +void +dpdk_detach_thread(void) +{ + unsigned int lcore_id; + + lcore_id = rte_lcore_id(); + rte_thread_unregister(); + VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id); } void @@ -596,7 +631,7 @@ void dpdk_status(const struct ovsrec_open_vswitch *cfg) { if (cfg) { - ovsrec_open_vswitch_set_dpdk_initialized(cfg, dpdk_initialized); + ovsrec_open_vswitch_set_dpdk_initialized(cfg, dpdk_available()); ovsrec_open_vswitch_set_dpdk_version(cfg, rte_version()); } } diff --git a/lib/dpdk.h b/lib/dpdk.h index 2eb1aedbb..64ebca47d 100644 --- a/lib/dpdk.h +++ b/lib/dpdk.h @@ -36,7 +36,8 @@ struct smap; struct ovsrec_open_vswitch; void dpdk_init(const struct smap *ovs_other_config); -void dpdk_set_lcore_id(unsigned cpu); +bool dpdk_attach_thread(unsigned cpu); +void dpdk_detach_thread(void); const char *dpdk_get_vhost_sock_dir(void); bool dpdk_vhost_iommu_enabled(void); bool dpdk_vhost_postcopy_enabled(void); diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 1efeab622..649c700cb 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -6390,6 +6390,7 @@ pmd_thread_main(void *f_) unsigned int lc = 0; struct polled_queue *poll_list; bool wait_for_reload = false; + bool dpdk_attached; bool reload_tx_qid; bool exiting; bool reload; @@ -6402,7 +6403,7 @@ pmd_thread_main(void *f_) /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); ovs_numa_thread_setaffinity_core(pmd->core_id); - dpdk_set_lcore_id(pmd->core_id); + dpdk_attached = dpdk_attach_thread(pmd->core_id); poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list); dfc_cache_init(&pmd->flow_cache); pmd_alloc_static_tx_qid(pmd); @@ -6410,6 +6411,10 @@ pmd_thread_main(void *f_) reload: atomic_count_init(&pmd->pmd_overloaded, 0); + if (!dpdk_attached) { + dpdk_attached = dpdk_attach_thread(pmd->core_id); + } + /* List port/core affinity */ for (i = 0; i < poll_cnt; i++) { VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n", @@ -6546,6 +6551,9 @@ reload: dfc_cache_uninit(&pmd->flow_cache); free(poll_list); pmd_free_cached_ports(pmd); + if (dpdk_attached) { + dpdk_detach_thread(); + } return NULL; } |